From 1df4010accf2e4ae1785e4dd777f4290bbd12846 Mon Sep 17 00:00:00 2001 From: wangwei Date: Fri, 26 Jun 2026 20:34:01 +0800 Subject: [PATCH] fix(llm): resolve score runtime config from saved profiles Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- rag_eval/metrics/factory.py | 39 +++++++++++++++++++-- tests/webapp/test_llm_profiles_api.py | 11 ++++-- tests/webapp/test_profile_manager.py | 49 +++++++++++++++++++++++++++ webapp/api/llm_profiles.py | 13 +++++++ webapp/services/inline_scorer.py | 11 +++++- 5 files changed, 117 insertions(+), 6 deletions(-) diff --git a/rag_eval/metrics/factory.py b/rag_eval/metrics/factory.py index 4c2cbcd..df5683a 100644 --- a/rag_eval/metrics/factory.py +++ b/rag_eval/metrics/factory.py @@ -27,13 +27,48 @@ from ragas.metrics.collections import ( from .pipeline import MetricPipeline +def _resolve_openai_client_kwargs( + judge_model: str, + settings: EvaluationSettings, +) -> dict[str, Any]: + """Return AsyncOpenAI kwargs, preferring a matching LLM Profile over .env settings. + + Lookup order: + 1. LLM Profile whose model name equals judge_model (exact match) + 2. Fall back to EvaluationSettings (.env) + """ + try: + # Lazy import to avoid circular dependency (webapp -> rag_eval is one-way). + from webapp.services.profile_manager import profile_manager + profiles = profile_manager.list_all() + for profile in profiles: + if profile.model == judge_model: + kwargs: dict[str, Any] = { + "api_key": profile.api_key or "sk-placeholder", + "timeout": float(profile.timeout_seconds or 30), + } + if profile.base_url and profile.base_url.strip(): + kwargs["base_url"] = profile.base_url.strip() + return kwargs + except Exception: # noqa: BLE001 + # If profile lookup fails for any reason, fall through to .env settings. + pass + + return settings.openai_client_kwargs + + def build_models( judge_model: str, embedding_model: str, settings: EvaluationSettings, ) -> tuple[Any, Any]: - """Create the LLM and embedding clients required by the selected RAGAS metrics.""" - client = AsyncOpenAI(**settings.openai_client_kwargs) + """Create the LLM and embedding clients required by the selected RAGAS metrics. + + Dynamically resolves connection settings from the stored LLM Profiles first + (matched by model name), falling back to .env settings when no profile matches. + """ + client_kwargs = _resolve_openai_client_kwargs(judge_model, settings) + client = AsyncOpenAI(**client_kwargs) llm = llm_factory(judge_model, client=client) embeddings = embedding_factory(provider="openai", model=embedding_model, client=client) return llm, embeddings diff --git a/tests/webapp/test_llm_profiles_api.py b/tests/webapp/test_llm_profiles_api.py index 6903ec3..d131298 100644 --- a/tests/webapp/test_llm_profiles_api.py +++ b/tests/webapp/test_llm_profiles_api.py @@ -1,6 +1,7 @@ """Integration tests for /api/llm-profiles endpoints.""" import pytest from fastapi.testclient import TestClient +from unittest.mock import patch @pytest.fixture() @@ -41,19 +42,23 @@ def test_update_profile(client): pid = client.post("/api/llm-profiles", json=body).json()["profile_id"] upd = {"name": "New", "model": "m2", "base_url": "http://x/v1", "api_key": "k", "timeout_seconds": 60} - resp = client.put(f"/api/llm-profiles/{pid}", json=upd) + with patch("webapp.services.inline_scorer.inline_scorer.invalidate_cache") as invalidate: + resp = client.put(f"/api/llm-profiles/{pid}", json=upd) assert resp.status_code == 200 assert resp.json()["name"] == "New" assert resp.json()["timeout_seconds"] == 60 + invalidate.assert_called_once() def test_delete_profile(client): body = {"name": "Del", "model": "m", "base_url": "http://x/v1", "api_key": "k"} pid = client.post("/api/llm-profiles", json=body).json()["profile_id"] - resp = client.delete(f"/api/llm-profiles/{pid}") + with patch("webapp.services.inline_scorer.inline_scorer.invalidate_cache") as invalidate: + resp = client.delete(f"/api/llm-profiles/{pid}") assert resp.status_code == 200 assert resp.json()["deleted"] is True assert len(client.get("/api/llm-profiles").json()["profiles"]) == 0 + invalidate.assert_called_once() def test_update_nonexistent(client): @@ -185,7 +190,7 @@ def test_apply_doc_weights_patches_yaml(tmp_path): # --------------------------------------------------------------------------- # Connectivity test endpoint tests # --------------------------------------------------------------------------- -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock def test_probe_connectivity_success(client): diff --git a/tests/webapp/test_profile_manager.py b/tests/webapp/test_profile_manager.py index a592823..9dc8655 100644 --- a/tests/webapp/test_profile_manager.py +++ b/tests/webapp/test_profile_manager.py @@ -98,3 +98,52 @@ def test_get_nonexistent(tmp_path): def test_delete_nonexistent(tmp_path): mgr = _make_manager(tmp_path) assert mgr.delete("does-not-exist") is False + + +def test_resolve_openai_client_kwargs_prefers_matching_profile(tmp_path, monkeypatch): + """Metric runtime should prefer the saved LLM Profile over .env defaults.""" + from rag_eval.metrics.factory import _resolve_openai_client_kwargs + from rag_eval.settings import EvaluationSettings + import webapp.services.profile_manager as pm_mod + + mgr = _make_manager(tmp_path) + mgr.create( + name="Judge", + model="gpt-5.5", + base_url="http://39.107.88.131:13000", + api_key="sk-profile", + timeout_seconds=300, + ) + monkeypatch.setattr(pm_mod, "profile_manager", mgr) + + settings = EvaluationSettings( + OPENAI_API_KEY="sk-env", + OPENAI_BASE_URL="http://env-base/v1", + OPENAI_TIMEOUT_SECONDS=30, + ) + + kwargs = _resolve_openai_client_kwargs("gpt-5.5", settings) + assert kwargs["api_key"] == "sk-profile" + assert kwargs["base_url"] == "http://39.107.88.131:13000" + assert kwargs["timeout"] == 300.0 + + +def test_resolve_openai_client_kwargs_falls_back_to_env(tmp_path, monkeypatch): + """When no saved profile matches, .env settings remain the fallback.""" + from rag_eval.metrics.factory import _resolve_openai_client_kwargs + from rag_eval.settings import EvaluationSettings + import webapp.services.profile_manager as pm_mod + + mgr = _make_manager(tmp_path) + monkeypatch.setattr(pm_mod, "profile_manager", mgr) + + settings = EvaluationSettings( + OPENAI_API_KEY="sk-env", + OPENAI_BASE_URL="http://env-base/v1", + OPENAI_TIMEOUT_SECONDS=45, + ) + + kwargs = _resolve_openai_client_kwargs("gpt-5", settings) + assert kwargs["api_key"] == "sk-env" + assert kwargs["base_url"] == "http://env-base/v1" + assert kwargs["timeout"] == 45.0 diff --git a/webapp/api/llm_profiles.py b/webapp/api/llm_profiles.py index 02fff3c..4e92846 100644 --- a/webapp/api/llm_profiles.py +++ b/webapp/api/llm_profiles.py @@ -148,6 +148,13 @@ def update_profile(profile_id: str, request: CreateProfileRequest) -> LLMProfile if updated is None: logger.warning("[update_profile] not found id=%s", profile_id) raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}") + # Invalidate scorer cache so next request picks up the new profile settings. + try: + from webapp.services.inline_scorer import inline_scorer + inline_scorer.invalidate_cache() + logger.info("[update_profile] scorer cache invalidated id=%s", profile_id) + except Exception: # noqa: BLE001 + pass logger.info("[update_profile] updated id=%s", profile_id) return updated @@ -160,6 +167,12 @@ def delete_profile(profile_id: str) -> dict: if not deleted: logger.warning("[delete_profile] not found id=%s", profile_id) raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}") + # Invalidate scorer cache in case the deleted profile was in use. + try: + from webapp.services.inline_scorer import inline_scorer + inline_scorer.invalidate_cache() + except Exception: # noqa: BLE001 + pass logger.info("[delete_profile] deleted id=%s", profile_id) return {"deleted": True} diff --git a/webapp/services/inline_scorer.py b/webapp/services/inline_scorer.py index 9d0d5ed..0ee843f 100644 --- a/webapp/services/inline_scorer.py +++ b/webapp/services/inline_scorer.py @@ -54,13 +54,22 @@ class InlineScorer: self._model_cache: dict[tuple[str, str], tuple[Any, Any]] = {} self._lock = threading.Lock() + def invalidate_cache(self) -> None: + """Clear the model cache so the next call rebuilds clients from current profiles.""" + with self._lock: + self._model_cache.clear() + def _get_models( self, judge_model: str, embedding_model: str, settings: EvaluationSettings, ) -> tuple[Any, Any]: - """Return cached LLM/embedding clients, building them on first use.""" + """Return cached LLM/embedding clients, building them on first use. + + Cache is keyed by (judge_model, embedding_model). Call invalidate_cache() + after updating an LLM Profile to force a fresh client on the next request. + """ cache_key = (judge_model, embedding_model) with self._lock: if cache_key not in self._model_cache: