fix: support max_completion_tokens for newer models (gpt-5.x) in connectivity test

Newer OpenAI models (gpt-5.4 etc.) reject max_tokens and require
max_completion_tokens. Try max_completion_tokens first, fall back to
max_tokens for older models / compatible APIs.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-06-23 14:51:28 +08:00
parent 1dc7ab9727
commit 05419db1f9

View File

@@ -29,24 +29,36 @@ def _do_connectivity_test(
api_key: str,
timeout_seconds: int,
) -> ProfileTestResponse:
"""Send a minimal chat completion request and return the test result."""
"""Send a minimal chat completion request and return the test result.
Tries max_completion_tokens first (required by newer OpenAI models like gpt-5.x),
then falls back to max_tokens for older models / compatible APIs.
"""
client = OpenAI(
api_key=api_key,
base_url=base_url.rstrip("/"),
timeout=float(timeout_seconds),
)
t0 = time.monotonic()
# Try newer parameter first, fall back to legacy max_tokens on failure
for kwargs in [{"max_completion_tokens": 1}, {"max_tokens": 1}]:
try:
client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "hi"}],
max_tokens=1,
**kwargs,
)
latency_ms = int((time.monotonic() - t0) * 1000)
return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
except Exception as exc: # noqa: BLE001
err_str = str(exc)
# Only retry if the error is specifically about the token parameter name
if "max_tokens" in err_str and "max_completion_tokens" in err_str and kwargs.get("max_completion_tokens"):
continue
latency_ms = int((time.monotonic() - t0) * 1000)
return ProfileTestResponse(ok=False, message=str(exc), latency_ms=latency_ms)
return ProfileTestResponse(ok=False, message=err_str, latency_ms=latency_ms)
latency_ms = int((time.monotonic() - t0) * 1000)
return ProfileTestResponse(ok=False, message="连接测试失败", latency_ms=latency_ms)
@router.post("/probe", response_model=ProfileTestResponse, tags=["llm-profiles"])