fix: use max_tokens=8 for chat model connectivity test

max_tokens=1 triggers 'min-output limit' errors on gpt-5.x models.
Using 8 tokens is still cheap but satisfies all known model minimums.
Falls back to max_completion_tokens=8 if max_tokens is not supported.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-06-23 15:03:27 +08:00
parent fb420656ec
commit f8e308b7dc

View File

@@ -64,23 +64,33 @@ def _do_connectivity_test(
latency_ms = int((time.monotonic() - t0) * 1000) latency_ms = int((time.monotonic() - t0) * 1000)
return ProfileTestResponse(ok=False, message=str(exc), latency_ms=latency_ms) return ProfileTestResponse(ok=False, message=str(exc), latency_ms=latency_ms)
# Chat 模型:先用 max_completion_tokens失败时 fallback 到 max_tokens # Chat 模型:先不限制 token最兼容超时/鉴权错误直接返回
for kwargs in [{"max_completion_tokens": 1}, {"max_tokens": 1}]: # 避免 max_tokens=1 对部分模型gpt-5.x触发 min-output 限制
try: try:
client.chat.completions.create( client.chat.completions.create(
model=model, model=model,
messages=[{"role": "user", "content": "hi"}], messages=[{"role": "user", "content": "hi"}],
**kwargs, max_tokens=8, # 足够小节省费用,同时满足各模型最小输出要求
) )
latency_ms = int((time.monotonic() - t0) * 1000) latency_ms = int((time.monotonic() - t0) * 1000)
return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms) return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
except Exception as exc: # noqa: BLE001 except Exception as exc: # noqa: BLE001
err_str = str(exc) err_str = str(exc)
# 仅当错误明确提示参数名称问题时才重试 # 如果 max_tokens 不被支持,改用 max_completion_tokens 再试一次
if "max_tokens" in err_str and "max_completion_tokens" in err_str and kwargs.get("max_completion_tokens"): if "max_tokens" in err_str and "max_completion_tokens" in err_str:
continue try:
latency_ms = int((time.monotonic() - t0) * 1000) client.chat.completions.create(
return ProfileTestResponse(ok=False, message=err_str, latency_ms=latency_ms) model=model,
messages=[{"role": "user", "content": "hi"}],
max_completion_tokens=8,
)
latency_ms = int((time.monotonic() - t0) * 1000)
return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
except Exception as exc2: # noqa: BLE001
latency_ms = int((time.monotonic() - t0) * 1000)
return ProfileTestResponse(ok=False, message=str(exc2), latency_ms=latency_ms)
latency_ms = int((time.monotonic() - t0) * 1000)
return ProfileTestResponse(ok=False, message=err_str, latency_ms=latency_ms)
latency_ms = int((time.monotonic() - t0) * 1000) latency_ms = int((time.monotonic() - t0) * 1000)
return ProfileTestResponse(ok=False, message="连接测试失败", latency_ms=latency_ms) return ProfileTestResponse(ok=False, message="连接测试失败", latency_ms=latency_ms)