fix(llm): resolve score runtime config from saved profiles
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -27,13 +27,48 @@ from ragas.metrics.collections import (
|
|||||||
from .pipeline import MetricPipeline
|
from .pipeline import MetricPipeline
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_openai_client_kwargs(
|
||||||
|
judge_model: str,
|
||||||
|
settings: EvaluationSettings,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Return AsyncOpenAI kwargs, preferring a matching LLM Profile over .env settings.
|
||||||
|
|
||||||
|
Lookup order:
|
||||||
|
1. LLM Profile whose model name equals judge_model (exact match)
|
||||||
|
2. Fall back to EvaluationSettings (.env)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Lazy import to avoid circular dependency (webapp -> rag_eval is one-way).
|
||||||
|
from webapp.services.profile_manager import profile_manager
|
||||||
|
profiles = profile_manager.list_all()
|
||||||
|
for profile in profiles:
|
||||||
|
if profile.model == judge_model:
|
||||||
|
kwargs: dict[str, Any] = {
|
||||||
|
"api_key": profile.api_key or "sk-placeholder",
|
||||||
|
"timeout": float(profile.timeout_seconds or 30),
|
||||||
|
}
|
||||||
|
if profile.base_url and profile.base_url.strip():
|
||||||
|
kwargs["base_url"] = profile.base_url.strip()
|
||||||
|
return kwargs
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
# If profile lookup fails for any reason, fall through to .env settings.
|
||||||
|
pass
|
||||||
|
|
||||||
|
return settings.openai_client_kwargs
|
||||||
|
|
||||||
|
|
||||||
def build_models(
|
def build_models(
|
||||||
judge_model: str,
|
judge_model: str,
|
||||||
embedding_model: str,
|
embedding_model: str,
|
||||||
settings: EvaluationSettings,
|
settings: EvaluationSettings,
|
||||||
) -> tuple[Any, Any]:
|
) -> tuple[Any, Any]:
|
||||||
"""Create the LLM and embedding clients required by the selected RAGAS metrics."""
|
"""Create the LLM and embedding clients required by the selected RAGAS metrics.
|
||||||
client = AsyncOpenAI(**settings.openai_client_kwargs)
|
|
||||||
|
Dynamically resolves connection settings from the stored LLM Profiles first
|
||||||
|
(matched by model name), falling back to .env settings when no profile matches.
|
||||||
|
"""
|
||||||
|
client_kwargs = _resolve_openai_client_kwargs(judge_model, settings)
|
||||||
|
client = AsyncOpenAI(**client_kwargs)
|
||||||
llm = llm_factory(judge_model, client=client)
|
llm = llm_factory(judge_model, client=client)
|
||||||
embeddings = embedding_factory(provider="openai", model=embedding_model, client=client)
|
embeddings = embedding_factory(provider="openai", model=embedding_model, client=client)
|
||||||
return llm, embeddings
|
return llm, embeddings
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
"""Integration tests for /api/llm-profiles endpoints."""
|
"""Integration tests for /api/llm-profiles endpoints."""
|
||||||
import pytest
|
import pytest
|
||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
@@ -41,19 +42,23 @@ def test_update_profile(client):
|
|||||||
pid = client.post("/api/llm-profiles", json=body).json()["profile_id"]
|
pid = client.post("/api/llm-profiles", json=body).json()["profile_id"]
|
||||||
|
|
||||||
upd = {"name": "New", "model": "m2", "base_url": "http://x/v1", "api_key": "k", "timeout_seconds": 60}
|
upd = {"name": "New", "model": "m2", "base_url": "http://x/v1", "api_key": "k", "timeout_seconds": 60}
|
||||||
|
with patch("webapp.services.inline_scorer.inline_scorer.invalidate_cache") as invalidate:
|
||||||
resp = client.put(f"/api/llm-profiles/{pid}", json=upd)
|
resp = client.put(f"/api/llm-profiles/{pid}", json=upd)
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert resp.json()["name"] == "New"
|
assert resp.json()["name"] == "New"
|
||||||
assert resp.json()["timeout_seconds"] == 60
|
assert resp.json()["timeout_seconds"] == 60
|
||||||
|
invalidate.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
def test_delete_profile(client):
|
def test_delete_profile(client):
|
||||||
body = {"name": "Del", "model": "m", "base_url": "http://x/v1", "api_key": "k"}
|
body = {"name": "Del", "model": "m", "base_url": "http://x/v1", "api_key": "k"}
|
||||||
pid = client.post("/api/llm-profiles", json=body).json()["profile_id"]
|
pid = client.post("/api/llm-profiles", json=body).json()["profile_id"]
|
||||||
|
with patch("webapp.services.inline_scorer.inline_scorer.invalidate_cache") as invalidate:
|
||||||
resp = client.delete(f"/api/llm-profiles/{pid}")
|
resp = client.delete(f"/api/llm-profiles/{pid}")
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert resp.json()["deleted"] is True
|
assert resp.json()["deleted"] is True
|
||||||
assert len(client.get("/api/llm-profiles").json()["profiles"]) == 0
|
assert len(client.get("/api/llm-profiles").json()["profiles"]) == 0
|
||||||
|
invalidate.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
def test_update_nonexistent(client):
|
def test_update_nonexistent(client):
|
||||||
@@ -185,7 +190,7 @@ def test_apply_doc_weights_patches_yaml(tmp_path):
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Connectivity test endpoint tests
|
# Connectivity test endpoint tests
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
|
||||||
def test_probe_connectivity_success(client):
|
def test_probe_connectivity_success(client):
|
||||||
|
|||||||
@@ -98,3 +98,52 @@ def test_get_nonexistent(tmp_path):
|
|||||||
def test_delete_nonexistent(tmp_path):
|
def test_delete_nonexistent(tmp_path):
|
||||||
mgr = _make_manager(tmp_path)
|
mgr = _make_manager(tmp_path)
|
||||||
assert mgr.delete("does-not-exist") is False
|
assert mgr.delete("does-not-exist") is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_openai_client_kwargs_prefers_matching_profile(tmp_path, monkeypatch):
|
||||||
|
"""Metric runtime should prefer the saved LLM Profile over .env defaults."""
|
||||||
|
from rag_eval.metrics.factory import _resolve_openai_client_kwargs
|
||||||
|
from rag_eval.settings import EvaluationSettings
|
||||||
|
import webapp.services.profile_manager as pm_mod
|
||||||
|
|
||||||
|
mgr = _make_manager(tmp_path)
|
||||||
|
mgr.create(
|
||||||
|
name="Judge",
|
||||||
|
model="gpt-5.5",
|
||||||
|
base_url="http://39.107.88.131:13000",
|
||||||
|
api_key="sk-profile",
|
||||||
|
timeout_seconds=300,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(pm_mod, "profile_manager", mgr)
|
||||||
|
|
||||||
|
settings = EvaluationSettings(
|
||||||
|
OPENAI_API_KEY="sk-env",
|
||||||
|
OPENAI_BASE_URL="http://env-base/v1",
|
||||||
|
OPENAI_TIMEOUT_SECONDS=30,
|
||||||
|
)
|
||||||
|
|
||||||
|
kwargs = _resolve_openai_client_kwargs("gpt-5.5", settings)
|
||||||
|
assert kwargs["api_key"] == "sk-profile"
|
||||||
|
assert kwargs["base_url"] == "http://39.107.88.131:13000"
|
||||||
|
assert kwargs["timeout"] == 300.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_openai_client_kwargs_falls_back_to_env(tmp_path, monkeypatch):
|
||||||
|
"""When no saved profile matches, .env settings remain the fallback."""
|
||||||
|
from rag_eval.metrics.factory import _resolve_openai_client_kwargs
|
||||||
|
from rag_eval.settings import EvaluationSettings
|
||||||
|
import webapp.services.profile_manager as pm_mod
|
||||||
|
|
||||||
|
mgr = _make_manager(tmp_path)
|
||||||
|
monkeypatch.setattr(pm_mod, "profile_manager", mgr)
|
||||||
|
|
||||||
|
settings = EvaluationSettings(
|
||||||
|
OPENAI_API_KEY="sk-env",
|
||||||
|
OPENAI_BASE_URL="http://env-base/v1",
|
||||||
|
OPENAI_TIMEOUT_SECONDS=45,
|
||||||
|
)
|
||||||
|
|
||||||
|
kwargs = _resolve_openai_client_kwargs("gpt-5", settings)
|
||||||
|
assert kwargs["api_key"] == "sk-env"
|
||||||
|
assert kwargs["base_url"] == "http://env-base/v1"
|
||||||
|
assert kwargs["timeout"] == 45.0
|
||||||
|
|||||||
@@ -148,6 +148,13 @@ def update_profile(profile_id: str, request: CreateProfileRequest) -> LLMProfile
|
|||||||
if updated is None:
|
if updated is None:
|
||||||
logger.warning("[update_profile] not found id=%s", profile_id)
|
logger.warning("[update_profile] not found id=%s", profile_id)
|
||||||
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
||||||
|
# Invalidate scorer cache so next request picks up the new profile settings.
|
||||||
|
try:
|
||||||
|
from webapp.services.inline_scorer import inline_scorer
|
||||||
|
inline_scorer.invalidate_cache()
|
||||||
|
logger.info("[update_profile] scorer cache invalidated id=%s", profile_id)
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
pass
|
||||||
logger.info("[update_profile] updated id=%s", profile_id)
|
logger.info("[update_profile] updated id=%s", profile_id)
|
||||||
return updated
|
return updated
|
||||||
|
|
||||||
@@ -160,6 +167,12 @@ def delete_profile(profile_id: str) -> dict:
|
|||||||
if not deleted:
|
if not deleted:
|
||||||
logger.warning("[delete_profile] not found id=%s", profile_id)
|
logger.warning("[delete_profile] not found id=%s", profile_id)
|
||||||
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
|
||||||
|
# Invalidate scorer cache in case the deleted profile was in use.
|
||||||
|
try:
|
||||||
|
from webapp.services.inline_scorer import inline_scorer
|
||||||
|
inline_scorer.invalidate_cache()
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
pass
|
||||||
logger.info("[delete_profile] deleted id=%s", profile_id)
|
logger.info("[delete_profile] deleted id=%s", profile_id)
|
||||||
return {"deleted": True}
|
return {"deleted": True}
|
||||||
|
|
||||||
|
|||||||
@@ -54,13 +54,22 @@ class InlineScorer:
|
|||||||
self._model_cache: dict[tuple[str, str], tuple[Any, Any]] = {}
|
self._model_cache: dict[tuple[str, str], tuple[Any, Any]] = {}
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def invalidate_cache(self) -> None:
|
||||||
|
"""Clear the model cache so the next call rebuilds clients from current profiles."""
|
||||||
|
with self._lock:
|
||||||
|
self._model_cache.clear()
|
||||||
|
|
||||||
def _get_models(
|
def _get_models(
|
||||||
self,
|
self,
|
||||||
judge_model: str,
|
judge_model: str,
|
||||||
embedding_model: str,
|
embedding_model: str,
|
||||||
settings: EvaluationSettings,
|
settings: EvaluationSettings,
|
||||||
) -> tuple[Any, Any]:
|
) -> tuple[Any, Any]:
|
||||||
"""Return cached LLM/embedding clients, building them on first use."""
|
"""Return cached LLM/embedding clients, building them on first use.
|
||||||
|
|
||||||
|
Cache is keyed by (judge_model, embedding_model). Call invalidate_cache()
|
||||||
|
after updating an LLM Profile to force a fresh client on the next request.
|
||||||
|
"""
|
||||||
cache_key = (judge_model, embedding_model)
|
cache_key = (judge_model, embedding_model)
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if cache_key not in self._model_cache:
|
if cache_key not in self._model_cache:
|
||||||
|
|||||||
Reference in New Issue
Block a user