siemens_ragas/webapp/api/llm_profiles.py

"""CRUD routes for LLM profiles plus the scenario-patching apply endpoint."""

from __future__ import annotations

import logging
import time

from fastapi import APIRouter, HTTPException
from openai import OpenAI

from webapp.models import (
    CreateProfileRequest,
    LLMProfile,
    ProfileApplyRequest,
    ProfileApplyResponse,
    ProfileProbeRequest,
    ProfileTestResponse,
)
from webapp.services.profile_manager import profile_manager
from webapp.services.yaml_patcher import apply_profiles_to_scenario

router = APIRouter(prefix="/api/llm-profiles", tags=["llm-profiles"])
logger = logging.getLogger("webapp.api.llm_profiles")


# 常见 embedding 模型名称关键词，用于自动判断走 /embeddings 端点
_EMBEDDING_MODEL_KEYWORDS = (
    "embedding", "embed", "text-search", "text-similarity",
    "code-search", "ada-002",
)


def _is_embedding_model(model: str) -> bool:
    """Heuristic: return True if the model name looks like an embedding model."""
    return any(kw in model.lower() for kw in _EMBEDDING_MODEL_KEYWORDS)


def _do_connectivity_test(
    model: str,
    base_url: str,
    api_key: str,
    timeout_seconds: int,
) -> ProfileTestResponse:
    """Send a minimal request and return the connectivity test result.

    - Embedding models → POST /embeddings with a short text
    - Chat models → POST /chat/completions, tries max_completion_tokens first
      (required by newer models like gpt-5.x), falls back to max_tokens.
    """
    client = OpenAI(
        api_key=api_key,
        base_url=base_url.rstrip("/"),
        timeout=float(timeout_seconds),
    )
    t0 = time.monotonic()

    if _is_embedding_model(model):
        # Embedding 模型走 /embeddings 端点
        try:
            client.embeddings.create(model=model, input="test")
            latency_ms = int((time.monotonic() - t0) * 1000)
            return ProfileTestResponse(ok=True, message="连接成功（embedding）", latency_ms=latency_ms)
        except Exception as exc:  # noqa: BLE001
            latency_ms = int((time.monotonic() - t0) * 1000)
            return ProfileTestResponse(ok=False, message=str(exc), latency_ms=latency_ms)

    # Chat 模型：先不限制 token（最兼容），超时/鉴权错误直接返回
    # 避免 max_tokens=1 对部分模型（gpt-5.x）触发 min-output 限制
    try:
        client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": "hi"}],
            max_tokens=8,   # 足够小节省费用，同时满足各模型最小输出要求
        )
        latency_ms = int((time.monotonic() - t0) * 1000)
        return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
    except Exception as exc:  # noqa: BLE001
        err_str = str(exc)
        # 如果 max_tokens 不被支持，改用 max_completion_tokens 再试一次
        if "max_tokens" in err_str and "max_completion_tokens" in err_str:
            try:
                client.chat.completions.create(
                    model=model,
                    messages=[{"role": "user", "content": "hi"}],
                    max_completion_tokens=8,
                )
                latency_ms = int((time.monotonic() - t0) * 1000)
                return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
            except Exception as exc2:  # noqa: BLE001
                latency_ms = int((time.monotonic() - t0) * 1000)
                return ProfileTestResponse(ok=False, message=str(exc2), latency_ms=latency_ms)
        latency_ms = int((time.monotonic() - t0) * 1000)
        return ProfileTestResponse(ok=False, message=err_str, latency_ms=latency_ms)

    latency_ms = int((time.monotonic() - t0) * 1000)
    return ProfileTestResponse(ok=False, message="连接测试失败", latency_ms=latency_ms)


@router.post("/probe", response_model=ProfileTestResponse, tags=["llm-profiles"])
def probe_connectivity(request: ProfileProbeRequest) -> ProfileTestResponse:
    """Test LLM connectivity with inline credentials (no saved profile required)."""
    logger.info("[probe] model=%s  base_url=%s", request.model, request.base_url)
    result = _do_connectivity_test(
        model=request.model,
        base_url=request.base_url,
        api_key=request.api_key,
        timeout_seconds=request.timeout_seconds,
    )
    logger.info("[probe] ok=%s  latency=%sms  msg=%s", result.ok, result.latency_ms, result.message)
    return result


@router.get("", response_model=dict)
def list_profiles() -> dict:
    """Return all saved LLM profiles."""
    profiles = profile_manager.list_all()
    logger.info("[list_profiles] count=%d", len(profiles))
    return {"profiles": [p.model_dump() for p in profiles]}


@router.post("", status_code=201, response_model=LLMProfile)
def create_profile(request: CreateProfileRequest) -> LLMProfile:
    """Create a new LLM profile."""
    logger.info("[create_profile] name=%r  model=%s  base_url=%s", request.name, request.model, request.base_url)
    profile = profile_manager.create(
        name=request.name,
        model=request.model,
        base_url=request.base_url,
        api_key=request.api_key,
        timeout_seconds=request.timeout_seconds,
    )
    logger.info("[create_profile] created  id=%s", profile.profile_id)
    return profile


@router.put("/{profile_id}", response_model=LLMProfile)
def update_profile(profile_id: str, request: CreateProfileRequest) -> LLMProfile:
    """Update an existing LLM profile by id."""
    logger.info("[update_profile] id=%s  name=%r  model=%s", profile_id, request.name, request.model)
    updated = profile_manager.update(
        profile_id=profile_id,
        name=request.name,
        model=request.model,
        base_url=request.base_url,
        api_key=request.api_key,
        timeout_seconds=request.timeout_seconds,
    )
    if updated is None:
        logger.warning("[update_profile] not found  id=%s", profile_id)
        raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
    # Invalidate scorer cache so next request picks up the new profile settings.
    try:
        from webapp.services.inline_scorer import inline_scorer
        inline_scorer.invalidate_cache()
        logger.info("[update_profile] scorer cache invalidated  id=%s", profile_id)
    except Exception:  # noqa: BLE001
        pass
    logger.info("[update_profile] updated  id=%s", profile_id)
    return updated


@router.delete("/{profile_id}", response_model=dict)
def delete_profile(profile_id: str) -> dict:
    """Delete an LLM profile by id."""
    logger.info("[delete_profile] id=%s", profile_id)
    deleted = profile_manager.delete(profile_id)
    if not deleted:
        logger.warning("[delete_profile] not found  id=%s", profile_id)
        raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
    # Invalidate scorer cache in case the deleted profile was in use.
    try:
        from webapp.services.inline_scorer import inline_scorer
        inline_scorer.invalidate_cache()
    except Exception:  # noqa: BLE001
        pass
    logger.info("[delete_profile] deleted  id=%s", profile_id)
    return {"deleted": True}


@router.post("/{profile_id}/test", response_model=ProfileTestResponse)
def test_profile(profile_id: str) -> ProfileTestResponse:
    """Test LLM connectivity for a saved profile."""
    profile = profile_manager.get(profile_id)
    if profile is None:
        logger.warning("[test_profile] not found  id=%s", profile_id)
        raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
    logger.info("[test_profile] id=%s  model=%s  base_url=%s", profile_id, profile.model, profile.base_url)
    result = _do_connectivity_test(
        model=profile.model,
        base_url=profile.base_url,
        api_key=profile.api_key,
        timeout_seconds=profile.timeout_seconds,
    )
    logger.info("[test_profile] ok=%s  latency=%sms", result.ok, result.latency_ms)
    return result


@router.post("/apply", response_model=ProfileApplyResponse)
def apply_profiles(request: ProfileApplyRequest) -> ProfileApplyResponse:
    """Patch selected LLM profiles into the target scenario YAML file."""
    logger.info(
        "[apply_profiles] scenario=%s  judge=%s  answer=%s  dataset=%s  metric_weights=%s  doc_weights=%s",
        request.scenario_path,
        request.judge_profile_id,
        request.answer_profile_id,
        request.dataset_profile_id,
        bool(request.metric_weights),
        bool(request.doc_weights),
    )
    role_profiles: dict[str, LLMProfile | None] = {
        "judge": profile_manager.get(request.judge_profile_id) if request.judge_profile_id else None,
        "answer": profile_manager.get(request.answer_profile_id) if request.answer_profile_id else None,
        "dataset": profile_manager.get(request.dataset_profile_id) if request.dataset_profile_id else None,
    }

    missing = [
        role
        for role, pid in [
            ("judge", request.judge_profile_id),
            ("answer", request.answer_profile_id),
            ("dataset", request.dataset_profile_id),
        ]
        if pid and role_profiles[role] is None
    ]

    if missing:
        logger.warning("[apply_profiles] missing profiles for roles: %s", missing)
        raise HTTPException(
            status_code=400,
            detail=f"Profile(s) not found for roles: {', '.join(missing)}",
        )

    patched = apply_profiles_to_scenario(
        scenario_path=request.scenario_path,
        judge_profile=role_profiles["judge"],
        answer_profile=role_profiles["answer"],
        dataset_profile=role_profiles["dataset"],
        metric_weights=request.metric_weights,
        doc_weights=request.doc_weights,
    )
    logger.info("[apply_profiles] patched fields: %s", patched)
    return ProfileApplyResponse(
        scenario_path=request.scenario_path,
        patched_fields=patched,
    )
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								"""CRUD routes for LLM profiles plus the scenario-patching apply endpoint."""
 								from __future__ import annotations
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								import logging
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								import time
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								from fastapi import APIRouter, HTTPException
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								from openai import OpenAI
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
 								from webapp.models import (
 								    CreateProfileRequest,
 								    LLMProfile,
 								    ProfileApplyRequest,
 								    ProfileApplyResponse,
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								    ProfileProbeRequest,
 								    ProfileTestResponse,
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								)
 								from webapp.services.profile_manager import profile_manager
 								from webapp.services.yaml_patcher import apply_profiles_to_scenario
 								router = APIRouter(prefix="/api/llm-profiles", tags=["llm-profiles"])
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								logger = logging.getLogger("webapp.api.llm_profiles")
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
-												fix: use /embeddings endpoint for embedding models in connectivity test

text-embedding-* and other embedding models must call /embeddings not
/chat/completions. Added _is_embedding_model() heuristic that checks model
name keywords to route to the correct endpoint automatically.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 14:53:32 +08:00
+								# 常见 embedding 模型名称关键词，用于自动判断走 /embeddings 端点
 								_EMBEDDING_MODEL_KEYWORDS = (
 								    "embedding", "embed", "text-search", "text-similarity",
 								    "code-search", "ada-002",
 								)
 								def _is_embedding_model(model: str) -> bool:
 								    """Heuristic: return True if the model name looks like an embedding model."""
 								    return any(kw in model.lower() for kw in _EMBEDDING_MODEL_KEYWORDS)
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								def _do_connectivity_test(
 								    model: str,
 								    base_url: str,
 								    api_key: str,
 								    timeout_seconds: int,
 								) -> ProfileTestResponse:
-												fix: use /embeddings endpoint for embedding models in connectivity test

text-embedding-* and other embedding models must call /embeddings not
/chat/completions. Added _is_embedding_model() heuristic that checks model
name keywords to route to the correct endpoint automatically.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 14:53:32 +08:00
+								    """Send a minimal request and return the connectivity test result.
-												fix: support max_completion_tokens for newer models (gpt-5.x) in connectivity test

Newer OpenAI models (gpt-5.4 etc.) reject max_tokens and require
max_completion_tokens. Try max_completion_tokens first, fall back to
max_tokens for older models / compatible APIs.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 14:51:28 +08:00
-												fix: use /embeddings endpoint for embedding models in connectivity test

text-embedding-* and other embedding models must call /embeddings not
/chat/completions. Added _is_embedding_model() heuristic that checks model
name keywords to route to the correct endpoint automatically.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 14:53:32 +08:00
+								    - Embedding models → POST /embeddings with a short text
 								    - Chat models → POST /chat/completions, tries max_completion_tokens first
 								      (required by newer models like gpt-5.x), falls back to max_tokens.
-												fix: support max_completion_tokens for newer models (gpt-5.x) in connectivity test

Newer OpenAI models (gpt-5.4 etc.) reject max_tokens and require
max_completion_tokens. Try max_completion_tokens first, fall back to
max_tokens for older models / compatible APIs.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 14:51:28 +08:00
+								    """
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								    client = OpenAI(
 								        api_key=api_key,
 								        base_url=base_url.rstrip("/"),
 								        timeout=float(timeout_seconds),
 								    )
 								    t0 = time.monotonic()
-												fix: use /embeddings endpoint for embedding models in connectivity test

text-embedding-* and other embedding models must call /embeddings not
/chat/completions. Added _is_embedding_model() heuristic that checks model
name keywords to route to the correct endpoint automatically.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 14:53:32 +08:00
 								    if _is_embedding_model(model):
 								        # Embedding 模型走 /embeddings 端点
 								        try:
 								            client.embeddings.create(model=model, input="test")
 								            latency_ms = int((time.monotonic() - t0) * 1000)
 								            return ProfileTestResponse(ok=True, message="连接成功（embedding）", latency_ms=latency_ms)
 								        except Exception as exc:  # noqa: BLE001
 								            latency_ms = int((time.monotonic() - t0) * 1000)
 								            return ProfileTestResponse(ok=False, message=str(exc), latency_ms=latency_ms)
-												fix: use max_tokens=8 for chat model connectivity test

max_tokens=1 triggers 'min-output limit' errors on gpt-5.x models.
Using 8 tokens is still cheap but satisfies all known model minimums.
Falls back to max_completion_tokens=8 if max_tokens is not supported.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 15:03:27 +08:00
+								    # Chat 模型：先不限制 token（最兼容），超时/鉴权错误直接返回
 								    # 避免 max_tokens=1 对部分模型（gpt-5.x）触发 min-output 限制
 								    try:
 								        client.chat.completions.create(
 								            model=model,
 								            messages=[{"role": "user", "content": "hi"}],
 								            max_tokens=8,   # 足够小节省费用，同时满足各模型最小输出要求
 								        )
 								        latency_ms = int((time.monotonic() - t0) * 1000)
 								        return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
 								    except Exception as exc:  # noqa: BLE001
 								        err_str = str(exc)
 								        # 如果 max_tokens 不被支持，改用 max_completion_tokens 再试一次
 								        if "max_tokens" in err_str and "max_completion_tokens" in err_str:
 								            try:
 								                client.chat.completions.create(
 								                    model=model,
 								                    messages=[{"role": "user", "content": "hi"}],
 								                    max_completion_tokens=8,
 								                )
 								                latency_ms = int((time.monotonic() - t0) * 1000)
 								                return ProfileTestResponse(ok=True, message="连接成功", latency_ms=latency_ms)
 								            except Exception as exc2:  # noqa: BLE001
 								                latency_ms = int((time.monotonic() - t0) * 1000)
 								                return ProfileTestResponse(ok=False, message=str(exc2), latency_ms=latency_ms)
 								        latency_ms = int((time.monotonic() - t0) * 1000)
 								        return ProfileTestResponse(ok=False, message=err_str, latency_ms=latency_ms)
-												fix: use /embeddings endpoint for embedding models in connectivity test

text-embedding-* and other embedding models must call /embeddings not
/chat/completions. Added _is_embedding_model() heuristic that checks model
name keywords to route to the correct endpoint automatically.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 14:53:32 +08:00
-												fix: support max_completion_tokens for newer models (gpt-5.x) in connectivity test

Newer OpenAI models (gpt-5.4 etc.) reject max_tokens and require
max_completion_tokens. Try max_completion_tokens first, fall back to
max_tokens for older models / compatible APIs.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 14:51:28 +08:00
+								    latency_ms = int((time.monotonic() - t0) * 1000)
 								    return ProfileTestResponse(ok=False, message="连接测试失败", latency_ms=latency_ms)
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
 								@router.post("/probe", response_model=ProfileTestResponse, tags=["llm-profiles"])
 								def probe_connectivity(request: ProfileProbeRequest) -> ProfileTestResponse:
 								    """Test LLM connectivity with inline credentials (no saved profile required)."""
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[probe] model=%s  base_url=%s", request.model, request.base_url)
 								    result = _do_connectivity_test(
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								        model=request.model,
 								        base_url=request.base_url,
 								        api_key=request.api_key,
 								        timeout_seconds=request.timeout_seconds,
 								    )
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[probe] ok=%s  latency=%sms  msg=%s", result.ok, result.latency_ms, result.message)
 								    return result
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								@router.get("", response_model=dict)
 								def list_profiles() -> dict:
 								    """Return all saved LLM profiles."""
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    profiles = profile_manager.list_all()
 								    logger.info("[list_profiles] count=%d", len(profiles))
 								    return {"profiles": [p.model_dump() for p in profiles]}
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
 								@router.post("", status_code=201, response_model=LLMProfile)
 								def create_profile(request: CreateProfileRequest) -> LLMProfile:
 								    """Create a new LLM profile."""
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[create_profile] name=%r  model=%s  base_url=%s", request.name, request.model, request.base_url)
 								    profile = profile_manager.create(
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								        name=request.name,
 								        model=request.model,
 								        base_url=request.base_url,
 								        api_key=request.api_key,
 								        timeout_seconds=request.timeout_seconds,
 								    )
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[create_profile] created  id=%s", profile.profile_id)
 								    return profile
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
 								@router.put("/{profile_id}", response_model=LLMProfile)
 								def update_profile(profile_id: str, request: CreateProfileRequest) -> LLMProfile:
 								    """Update an existing LLM profile by id."""
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[update_profile] id=%s  name=%r  model=%s", profile_id, request.name, request.model)
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								    updated = profile_manager.update(
 								        profile_id=profile_id,
 								        name=request.name,
 								        model=request.model,
 								        base_url=request.base_url,
 								        api_key=request.api_key,
 								        timeout_seconds=request.timeout_seconds,
 								    )
 								    if updated is None:
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								        logger.warning("[update_profile] not found  id=%s", profile_id)
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								        raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
-												fix(llm): resolve score runtime config from saved profiles

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-26 20:34:01 +08:00
+								    # Invalidate scorer cache so next request picks up the new profile settings.
 								    try:
 								        from webapp.services.inline_scorer import inline_scorer
 								        inline_scorer.invalidate_cache()
 								        logger.info("[update_profile] scorer cache invalidated  id=%s", profile_id)
 								    except Exception:  # noqa: BLE001
 								        pass
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[update_profile] updated  id=%s", profile_id)
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								    return updated
 								@router.delete("/{profile_id}", response_model=dict)
 								def delete_profile(profile_id: str) -> dict:
 								    """Delete an LLM profile by id."""
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[delete_profile] id=%s", profile_id)
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								    deleted = profile_manager.delete(profile_id)
 								    if not deleted:
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								        logger.warning("[delete_profile] not found  id=%s", profile_id)
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								        raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
-												fix(llm): resolve score runtime config from saved profiles

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-26 20:34:01 +08:00
+								    # Invalidate scorer cache in case the deleted profile was in use.
 								    try:
 								        from webapp.services.inline_scorer import inline_scorer
 								        inline_scorer.invalidate_cache()
 								    except Exception:  # noqa: BLE001
 								        pass
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[delete_profile] deleted  id=%s", profile_id)
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								    return {"deleted": True}
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								@router.post("/{profile_id}/test", response_model=ProfileTestResponse)
 								def test_profile(profile_id: str) -> ProfileTestResponse:
 								    """Test LLM connectivity for a saved profile."""
 								    profile = profile_manager.get(profile_id)
 								    if profile is None:
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								        logger.warning("[test_profile] not found  id=%s", profile_id)
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								        raise HTTPException(status_code=404, detail=f"Profile not found: {profile_id}")
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[test_profile] id=%s  model=%s  base_url=%s", profile_id, profile.model, profile.base_url)
 								    result = _do_connectivity_test(
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								        model=profile.model,
 								        base_url=profile.base_url,
 								        api_key=profile.api_key,
 								        timeout_seconds=profile.timeout_seconds,
 								    )
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[test_profile] ok=%s  latency=%sms", result.ok, result.latency_ms)
 								    return result
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								@router.post("/apply", response_model=ProfileApplyResponse)
 								def apply_profiles(request: ProfileApplyRequest) -> ProfileApplyResponse:
 								    """Patch selected LLM profiles into the target scenario YAML file."""
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info(
 								        "[apply_profiles] scenario=%s  judge=%s  answer=%s  dataset=%s  metric_weights=%s  doc_weights=%s",
 								        request.scenario_path,
 								        request.judge_profile_id,
 								        request.answer_profile_id,
 								        request.dataset_profile_id,
 								        bool(request.metric_weights),
 								        bool(request.doc_weights),
 								    )
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								    role_profiles: dict[str, LLMProfile | None] = {
 								        "judge": profile_manager.get(request.judge_profile_id) if request.judge_profile_id else None,
 								        "answer": profile_manager.get(request.answer_profile_id) if request.answer_profile_id else None,
 								        "dataset": profile_manager.get(request.dataset_profile_id) if request.dataset_profile_id else None,
 								    }
 								    missing = [
 								        role
 								        for role, pid in [
 								            ("judge", request.judge_profile_id),
 								            ("answer", request.answer_profile_id),
 								            ("dataset", request.dataset_profile_id),
 								        ]
 								        if pid and role_profiles[role] is None
 								    ]
 								    if missing:
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								        logger.warning("[apply_profiles] missing profiles for roles: %s", missing)
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								        raise HTTPException(
 								            status_code=400,
 								            detail=f"Profile(s) not found for roles: {', '.join(missing)}",
 								        )
 								    patched = apply_profiles_to_scenario(
 								        scenario_path=request.scenario_path,
 								        judge_profile=role_profiles["judge"],
 								        answer_profile=role_profiles["answer"],
 								        dataset_profile=role_profiles["dataset"],
-												feat: yaml_patcher and ProfileApplyRequest support metric_weights and doc_weights

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-18 17:02:21 +08:00
+								        metric_weights=request.metric_weights,
 								        doc_weights=request.doc_weights,
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								    )
-												feat: add detailed logging to all API routes and global access log middleware

Each API module now logs:
- evaluations: trigger (scenario path, task_id), status polls, list
- runs: list (count), detail (run_id, metrics, sample counts)
- scenarios: list (total, valid, error counts)
- pipeline: submit (docs_path, models, max_docs), status polls, list
- llm_profiles: CRUD ops (name, model, id), probe/test (model, ok, latency), apply (patched fields)
- score: already had per-request logging

Global middleware (webapp.access logger):
- Every API request: METHOD path -> status (latency_ms) at INFO
- Static file requests demoted to DEBUG to reduce noise

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

											
										
										
											2026-06-23 10:35:00 +08:00
+								    logger.info("[apply_profiles] patched fields: %s", patched)
-												feat: add /api/llm-profiles CRUD router

											
										
										
											2026-06-16 16:18:40 +08:00
+								    return ProfileApplyResponse(
 								        scenario_path=request.scenario_path,
 								        patched_fields=patched,
 								    )