feat(advisor): add optimization advisor module
- rag_eval/advisor/: new package with rules engine, LLM analyzer, writer - rules.py: 7-metric diagnostic rules (warning/critical thresholds, top-3 low samples) - llm_analyzer.py: Chinese optimization report via judge_model, graceful fallback - writer.py: writes optimization_advice.md + log summary - __init__.py: run_advisor() entry point (no-op when optimization_advisor=False) - Scenario.optimization_advisor: new bool field (default False) - ScenarioModel: same field added, loader.py透传 - RunArtifactPaths.advice_md: new path field - factory.py: build_models() now public; build_metric_pipeline() accepts pre-built llm/embeddings - runner.py: lifts llm, passes to pipeline and advisor; calls run_advisor() at end - siemens online YAML: optimization_advisor: true enabled - tests: 9 rules tests + 6 writer tests, all pass - docs: advisor section added to engine-flow.md and architecture.md Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -8,8 +8,9 @@ from pathlib import Path
|
||||
|
||||
from rag_eval.adapters.http import HttpAppAdapter
|
||||
from rag_eval.adapters.python import PythonFunctionAdapter
|
||||
from rag_eval.advisor import run_advisor
|
||||
from rag_eval.config.loader import load_scenario
|
||||
from rag_eval.metrics.factory import build_metric_pipeline
|
||||
from rag_eval.metrics.factory import build_models, build_metric_pipeline
|
||||
from rag_eval.reporting.writers import write_run_artifacts
|
||||
from rag_eval.settings import EvaluationSettings
|
||||
from rag_eval.shared.models import Scenario
|
||||
@@ -67,10 +68,17 @@ def run_scenario(
|
||||
logger.info("[runner] scenario loaded: name=%s mode=%s max_samples=%s",
|
||||
scenario.scenario_name, scenario.mode, scenario.runtime.max_samples)
|
||||
|
||||
# Build models once; reuse llm in both MetricPipeline and advisor.
|
||||
llm, embeddings = build_models(scenario.judge_model, scenario.embedding_model, settings)
|
||||
|
||||
adapter = build_adapter(scenario)
|
||||
pipeline = build_metric_pipeline(scenario, settings)
|
||||
pipeline = build_metric_pipeline(scenario, settings, llm=llm, embeddings=embeddings)
|
||||
evaluator = Evaluator(scenario=scenario, metric_pipeline=pipeline, app_adapter=adapter)
|
||||
result = evaluator.evaluate()
|
||||
write_run_artifacts(result)
|
||||
logger.info("[runner] artifacts written for run_id=%s", result.run_id)
|
||||
|
||||
# Optimization advisor — runs only if scenario.optimization_advisor is True.
|
||||
run_advisor(result, scenario, llm)
|
||||
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user