68 lines
2.2 KiB
Python
68 lines
2.2 KiB
Python
|
|
"""Optimization advisor: rule-based diagnosis + LLM-powered recommendations."""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import asyncio
|
||
|
|
import logging
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
from rag_eval.reporting.artifacts import build_artifact_paths
|
||
|
|
from rag_eval.shared.models import EvaluationResult, Scenario
|
||
|
|
|
||
|
|
from .llm_analyzer import analyze
|
||
|
|
from .rules import Diagnosis, diagnose
|
||
|
|
from .writer import write_advice
|
||
|
|
|
||
|
|
logger = logging.getLogger("rag_eval.advisor")
|
||
|
|
|
||
|
|
__all__ = ["run_advisor", "Diagnosis", "diagnose"]
|
||
|
|
|
||
|
|
|
||
|
|
def run_advisor(
|
||
|
|
result: EvaluationResult,
|
||
|
|
scenario: Scenario,
|
||
|
|
llm: Any,
|
||
|
|
) -> None:
|
||
|
|
"""Run the full optimization advisor pipeline after an evaluation completes.
|
||
|
|
|
||
|
|
Skips silently if scenario.optimization_advisor is False.
|
||
|
|
Never raises — failures are logged as warnings, not exceptions.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
result: Completed EvaluationResult from Evaluator.evaluate().
|
||
|
|
scenario: The resolved Scenario (provides metrics, judge_model, output_dir).
|
||
|
|
llm: Pre-built RAGAS LLM instance (from build_models()) for LLM analysis.
|
||
|
|
"""
|
||
|
|
if not scenario.optimization_advisor:
|
||
|
|
return
|
||
|
|
|
||
|
|
logger.info("[advisor] starting optimization analysis scenario=%s", scenario.scenario_name)
|
||
|
|
|
||
|
|
try:
|
||
|
|
artifact_paths = build_artifact_paths(scenario.output_dir, result.run_id)
|
||
|
|
if artifact_paths.advice_md is None:
|
||
|
|
logger.warning("[advisor] advice_md path not set in RunArtifactPaths — skipping")
|
||
|
|
return
|
||
|
|
|
||
|
|
diagnoses = diagnose(result.score_rows, scenario.metrics)
|
||
|
|
logger.info("[advisor] rule diagnosis complete: %d metric(s) triggered", len(diagnoses))
|
||
|
|
|
||
|
|
if diagnoses:
|
||
|
|
llm_markdown = asyncio.run(analyze(diagnoses, llm, scenario.scenario_name))
|
||
|
|
else:
|
||
|
|
llm_markdown = ""
|
||
|
|
|
||
|
|
write_advice(
|
||
|
|
diagnoses=diagnoses,
|
||
|
|
llm_markdown=llm_markdown,
|
||
|
|
advice_path=artifact_paths.advice_md,
|
||
|
|
scenario_name=scenario.scenario_name,
|
||
|
|
run_id=result.run_id,
|
||
|
|
judge_model=scenario.judge_model,
|
||
|
|
)
|
||
|
|
|
||
|
|
except Exception as exc:
|
||
|
|
logger.warning(
|
||
|
|
"[advisor] advisor failed (%s: %s) — evaluation result is unaffected",
|
||
|
|
type(exc).__name__, exc,
|
||
|
|
)
|