Files
siemens_ragas/tests/webapp/test_profile_manager.py

206 lines
6.8 KiB
Python
Raw Permalink Normal View History

2026-06-16 16:10:37 +08:00
import pytest
2026-06-27 14:31:45 +08:00
from unittest.mock import sentinel
2026-06-16 16:10:37 +08:00
from webapp.models import LLMProfile, ProfileApplyRequest, ProfileApplyResponse
def test_llm_profile_defaults():
p = LLMProfile(
profile_id="abc",
name="Test",
model="gpt-4",
base_url="http://localhost/v1",
api_key="sk-test",
)
assert p.timeout_seconds == 30
assert p.created_at != ""
assert p.updated_at != ""
def test_profile_apply_request_fields():
req = ProfileApplyRequest(
scenario_path="scenarios/offline/sample.yaml",
judge_profile_id="id1",
answer_profile_id="id2",
dataset_profile_id=None,
)
assert req.judge_profile_id == "id1"
assert req.dataset_profile_id is None
def test_profile_apply_response():
resp = ProfileApplyResponse(scenario_path="scenarios/offline/sample.yaml", patched_fields=["judge_model"])
assert "judge_model" in resp.patched_fields
# ---------------------------------------------------------------------------
# ProfileManager service tests
# ---------------------------------------------------------------------------
import json
from webapp.services.profile_manager import ProfileManager
def _make_manager(tmp_path):
store = tmp_path / "profiles.json"
return ProfileManager(store_path=store)
def test_create_profile(tmp_path):
mgr = _make_manager(tmp_path)
p = mgr.create(name="Local", model="deepseek-v4-flash",
base_url="http://localhost/v1", api_key="sk-x")
assert p.profile_id != ""
assert p.name == "Local"
def test_list_profiles(tmp_path):
mgr = _make_manager(tmp_path)
mgr.create(name="A", model="m1", base_url="http://a/v1", api_key="k1")
mgr.create(name="B", model="m2", base_url="http://b/v1", api_key="k2")
profiles = mgr.list_all()
assert len(profiles) == 2
def test_get_profile(tmp_path):
mgr = _make_manager(tmp_path)
created = mgr.create(name="X", model="m", base_url="http://x/v1", api_key="k")
fetched = mgr.get(created.profile_id)
assert fetched is not None
assert fetched.name == "X"
def test_update_profile(tmp_path):
mgr = _make_manager(tmp_path)
p = mgr.create(name="Old", model="m", base_url="http://x/v1", api_key="k")
updated = mgr.update(p.profile_id, name="New", model="m2",
base_url="http://x/v1", api_key="k", timeout_seconds=60)
assert updated is not None
assert updated.name == "New"
assert updated.model == "m2"
assert updated.timeout_seconds == 60
def test_delete_profile(tmp_path):
mgr = _make_manager(tmp_path)
p = mgr.create(name="Del", model="m", base_url="http://x/v1", api_key="k")
assert mgr.delete(p.profile_id) is True
assert mgr.get(p.profile_id) is None
def test_persistence(tmp_path):
store = tmp_path / "profiles.json"
mgr1 = ProfileManager(store_path=store)
p = mgr1.create(name="Persist", model="m", base_url="http://x/v1", api_key="k")
mgr2 = ProfileManager(store_path=store)
assert mgr2.get(p.profile_id) is not None
def test_get_nonexistent(tmp_path):
mgr = _make_manager(tmp_path)
assert mgr.get("does-not-exist") is None
def test_delete_nonexistent(tmp_path):
mgr = _make_manager(tmp_path)
assert mgr.delete("does-not-exist") is False
def test_resolve_openai_client_kwargs_prefers_matching_profile(tmp_path, monkeypatch):
"""Metric runtime should prefer the saved LLM Profile over .env defaults."""
from rag_eval.metrics.factory import _resolve_openai_client_kwargs
from rag_eval.settings import EvaluationSettings
import webapp.services.profile_manager as pm_mod
mgr = _make_manager(tmp_path)
mgr.create(
name="Judge",
model="gpt-5.5",
base_url="http://39.107.88.131:13000",
api_key="sk-profile",
timeout_seconds=300,
)
monkeypatch.setattr(pm_mod, "profile_manager", mgr)
settings = EvaluationSettings(
OPENAI_API_KEY="sk-env",
OPENAI_BASE_URL="http://env-base/v1",
OPENAI_TIMEOUT_SECONDS=30,
)
kwargs = _resolve_openai_client_kwargs("gpt-5.5", settings)
assert kwargs["api_key"] == "sk-profile"
assert kwargs["base_url"] == "http://39.107.88.131:13000"
assert kwargs["timeout"] == 300.0
def test_resolve_openai_client_kwargs_falls_back_to_env(tmp_path, monkeypatch):
"""When no saved profile matches, .env settings remain the fallback."""
from rag_eval.metrics.factory import _resolve_openai_client_kwargs
from rag_eval.settings import EvaluationSettings
import webapp.services.profile_manager as pm_mod
mgr = _make_manager(tmp_path)
monkeypatch.setattr(pm_mod, "profile_manager", mgr)
settings = EvaluationSettings(
OPENAI_API_KEY="sk-env",
OPENAI_BASE_URL="http://env-base/v1",
OPENAI_TIMEOUT_SECONDS=45,
)
kwargs = _resolve_openai_client_kwargs("gpt-5", settings)
assert kwargs["api_key"] == "sk-env"
assert kwargs["base_url"] == "http://env-base/v1"
assert kwargs["timeout"] == 45.0
2026-06-27 14:31:45 +08:00
def test_build_models_uses_high_default_max_tokens_for_structured_judge(monkeypatch):
"""Structured RAGAS judge calls should use a larger completion budget by default."""
import rag_eval.metrics.factory as factory
from rag_eval.settings import EvaluationSettings
captured: dict[str, object] = {}
def fake_llm_factory(model, client=None, **kwargs):
captured["model"] = model
captured["client"] = client
captured["kwargs"] = kwargs
return sentinel.llm
monkeypatch.setattr(factory, "AsyncOpenAI", lambda **kwargs: sentinel.client)
monkeypatch.setattr(factory, "llm_factory", fake_llm_factory)
monkeypatch.setattr(factory, "embedding_factory", lambda **kwargs: sentinel.embeddings)
llm, embeddings = factory.build_models(
"gpt-5",
"text-embedding-3-small",
EvaluationSettings(),
)
assert llm is sentinel.llm
assert embeddings is sentinel.embeddings
assert captured["model"] == "gpt-5"
assert captured["client"] is sentinel.client
assert captured["kwargs"] == {"max_tokens": 4096}
def test_build_models_allows_env_override_for_judge_max_tokens(monkeypatch):
"""Operators should be able to raise the judge completion budget via settings."""
import rag_eval.metrics.factory as factory
from rag_eval.settings import EvaluationSettings
captured: dict[str, object] = {}
def fake_llm_factory(model, client=None, **kwargs):
captured["kwargs"] = kwargs
return sentinel.llm
monkeypatch.setattr(factory, "AsyncOpenAI", lambda **kwargs: sentinel.client)
monkeypatch.setattr(factory, "llm_factory", fake_llm_factory)
monkeypatch.setattr(factory, "embedding_factory", lambda **kwargs: sentinel.embeddings)
factory.build_models(
"gpt-5",
"text-embedding-3-small",
EvaluationSettings(RAGAS_LLM_MAX_TOKENS=8192),
)
assert captured["kwargs"] == {"max_tokens": 8192}