This commit is contained in:
2026-06-27 14:31:45 +08:00
parent 1df4010acc
commit 9828b1d44c
16 changed files with 323 additions and 23 deletions

View File

@@ -69,7 +69,13 @@ def build_models(
"""
client_kwargs = _resolve_openai_client_kwargs(judge_model, settings)
client = AsyncOpenAI(**client_kwargs)
llm = llm_factory(judge_model, client=client)
# RAGAS structured-output judge calls can be truncated by the upstream default
# 1024 completion budget, especially for faithfulness and GPT-5 family models.
llm = llm_factory(
judge_model,
client=client,
max_tokens=max(1, int(settings.ragas_llm_max_tokens)),
)
embeddings = embedding_factory(provider="openai", model=embedding_model, client=client)
return llm, embeddings

View File

@@ -26,6 +26,11 @@ class EvaluationSettings(BaseSettings):
default="text-embedding-3-small",
alias="RAGAS_EMBEDDING_MODEL",
)
ragas_llm_max_tokens: int = Field(
default=4096,
alias="RAGAS_LLM_MAX_TOKENS",
gt=0,
)
openai_timeout_seconds: float = Field(default=30.0, alias="OPENAI_TIMEOUT_SECONDS")
ragas_metric_timeout_seconds: float = Field(default=45.0, alias="RAGAS_METRIC_TIMEOUT_SECONDS")
batch_size: int = Field(default=8, alias="BATCH_SIZE")