diff --git a/webapp/api/llm_profiles.py b/webapp/api/llm_profiles.py
index 0dfa1a0..cd71be3 100644
--- a/webapp/api/llm_profiles.py
+++ b/webapp/api/llm_profiles.py
@@ -29,24 +29,36 @@ def _do_connectivity_test(
     api_key: str,
     timeout_seconds: int,
 ) -> ProfileTestResponse:
-    """Send a minimal chat completion request and return the test result."""
+    """Send a minimal chat completion request and return the test result.
+
+    Tries max_completion_tokens first (required by newer OpenAI models like gpt-5.x),
+    then falls back to max_tokens for older models / compatible APIs.
+    """
     client = OpenAI(
         api_key=api_key,
         base_url=base_url.rstrip("/"),
         timeout=float(timeout_seconds),
     )
     t0 = time.monotonic()
-    try:
-        client.chat.completions.create(
-            model=model,
-            messages=[{"role": "user", "content": "hi"}],
-            max_tokens=1,
-        )
-        latency_ms = int((time.monotonic() - t0) * 1000)
-        return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
-    except Exception as exc:  # noqa: BLE001
-        latency_ms = int((time.monotonic() - t0) * 1000)
-        return ProfileTestResponse(ok=False, message=str(exc), latency_ms=latency_ms)
+    # Try newer parameter first, fall back to legacy max_tokens on failure
+    for kwargs in [{"max_completion_tokens": 1}, {"max_tokens": 1}]:
+        try:
+            client.chat.completions.create(
+                model=model,
+                messages=[{"role": "user", "content": "hi"}],
+                **kwargs,
+            )
+            latency_ms = int((time.monotonic() - t0) * 1000)
+            return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
+        except Exception as exc:  # noqa: BLE001
+            err_str = str(exc)
+            # Only retry if the error is specifically about the token parameter name
+            if "max_tokens" in err_str and "max_completion_tokens" in err_str and kwargs.get("max_completion_tokens"):
+                continue
+            latency_ms = int((time.monotonic() - t0) * 1000)
+            return ProfileTestResponse(ok=False, message=err_str, latency_ms=latency_ms)
+    latency_ms = int((time.monotonic() - t0) * 1000)
+    return ProfileTestResponse(ok=False, message="连接测试失败", latency_ms=latency_ms)
 
 
 @router.post("/probe", response_model=ProfileTestResponse, tags=["llm-profiles"])