From a781ba1e4ae54cbaa1babc7900c25a0fbaf0bc82 Mon Sep 17 00:00:00 2001 From: wangwei Date: Tue, 23 Jun 2026 15:29:01 +0800 Subject: [PATCH] config: set default judge_model=gpt-5, embedding_model=text-embedding-3-small gpt-5.4/5.5/5.2/5.4-mini/5.4-nano are incompatible with RAGAS 0.4.3 because they require max_completion_tokens instead of max_tokens. gpt-5 / gpt-4.1 support max_tokens and json_object mode required by RAGAS. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .env.example | 6 ++++-- rag_eval/settings.py | 4 ++-- webapp/api/score.py | 2 +- webapp/models.py | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index e430e6b..dc30131 100644 --- a/.env.example +++ b/.env.example @@ -8,8 +8,10 @@ OPENAI_BASE_URL=http://6.86.80.4:30080/v1 OPENAI_TIMEOUT_SECONDS=180 # 默认评测模型(可在场景 YAML 或 Web 控制台 LLM 配置中覆盖) -RAGAS_JUDGE_MODEL=deepseek-v4-flash -RAGAS_EMBEDDING_MODEL=text-embedding-v3 +# RAGAS_JUDGE_MODEL 需支持 max_tokens + json_object(gpt-5、gpt-4.1、gpt-4o 等) +# 注意:gpt-5.4/5.5/5.2 系列不支持 max_tokens,与 RAGAS 0.4.3 不兼容 +RAGAS_JUDGE_MODEL=gpt-5 +RAGAS_EMBEDDING_MODEL=text-embedding-3-small # 评估并发控制(启用 7 个指标时建议 RAGAS_METRIC_TIMEOUT_SECONDS=300) BATCH_SIZE=8 diff --git a/rag_eval/settings.py b/rag_eval/settings.py index ad1db52..750dc0f 100644 --- a/rag_eval/settings.py +++ b/rag_eval/settings.py @@ -21,9 +21,9 @@ class EvaluationSettings(BaseSettings): openai_api_key: str | None = Field(default=None, alias="OPENAI_API_KEY") openai_base_url: str = Field(default="http://6.86.80.4:30080/v1", alias="OPENAI_BASE_URL") - ragas_judge_model: str = Field(default="deepseek-v4-flash", alias="RAGAS_JUDGE_MODEL") + ragas_judge_model: str = Field(default="gpt-5", alias="RAGAS_JUDGE_MODEL") ragas_embedding_model: str = Field( - default="text-embedding-v3", + default="text-embedding-3-small", alias="RAGAS_EMBEDDING_MODEL", ) openai_timeout_seconds: float = Field(default=30.0, alias="OPENAI_TIMEOUT_SECONDS") diff --git a/webapp/api/score.py b/webapp/api/score.py index 4f2a303..6644d09 100644 --- a/webapp/api/score.py +++ b/webapp/api/score.py @@ -91,7 +91,7 @@ def score_sample( - `semantic_similarity` — 回答与参考答案的语义相似度(需 ground_truth) **推荐模型配置**: - - `judge_model`: `gpt-5.4` + - `judge_model`: `gpt-5` - `embedding_model`: `text-embedding-3-small` **鉴权**:若 `.env` 中配置了 `SCORE_API_TOKEN`,需在请求头携带 diff --git a/webapp/models.py b/webapp/models.py index 71a6801..89e6f48 100644 --- a/webapp/models.py +++ b/webapp/models.py @@ -420,7 +420,7 @@ class ScoreRequest(BaseModel): "context_recall", "context_precision", ], - "judge_model": "gpt-5.4", + "judge_model": "gpt-5", "embedding_model": "text-embedding-3-small", } }