diff --git a/webapp/api/llm_profiles.py b/webapp/api/llm_profiles.py index 0dfa1a0..cd71be3 100644 --- a/webapp/api/llm_profiles.py +++ b/webapp/api/llm_profiles.py @@ -29,24 +29,36 @@ def _do_connectivity_test( api_key: str, timeout_seconds: int, ) -> ProfileTestResponse: - """Send a minimal chat completion request and return the test result.""" + """Send a minimal chat completion request and return the test result. + + Tries max_completion_tokens first (required by newer OpenAI models like gpt-5.x), + then falls back to max_tokens for older models / compatible APIs. + """ client = OpenAI( api_key=api_key, base_url=base_url.rstrip("/"), timeout=float(timeout_seconds), ) t0 = time.monotonic() - try: - client.chat.completions.create( - model=model, - messages=[{"role": "user", "content": "hi"}], - max_tokens=1, - ) - latency_ms = int((time.monotonic() - t0) * 1000) - return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms) - except Exception as exc: # noqa: BLE001 - latency_ms = int((time.monotonic() - t0) * 1000) - return ProfileTestResponse(ok=False, message=str(exc), latency_ms=latency_ms) + # Try newer parameter first, fall back to legacy max_tokens on failure + for kwargs in [{"max_completion_tokens": 1}, {"max_tokens": 1}]: + try: + client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": "hi"}], + **kwargs, + ) + latency_ms = int((time.monotonic() - t0) * 1000) + return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms) + except Exception as exc: # noqa: BLE001 + err_str = str(exc) + # Only retry if the error is specifically about the token parameter name + if "max_tokens" in err_str and "max_completion_tokens" in err_str and kwargs.get("max_completion_tokens"): + continue + latency_ms = int((time.monotonic() - t0) * 1000) + return ProfileTestResponse(ok=False, message=err_str, latency_ms=latency_ms) + latency_ms = int((time.monotonic() - t0) * 1000) + return ProfileTestResponse(ok=False, message="连接测试失败", latency_ms=latency_ms) @router.post("/probe", response_model=ProfileTestResponse, tags=["llm-profiles"])