feat(logging): add structured evaluation logs for metric-level debugging
- pipeline.py: log each metric score/timeout/error with sample_id, elapsed time, and score value; log NaN list per sample; progress counter N/total after each sample completes - evaluator.py: log eval start, dataset counts, adapter enrichment progress (per-sample OK/FAIL with elapsed), metric scoring summary, and per-metric NaN rate at end of run - runner.py: _setup_logging() helper writes to stderr + optional file; ragas/httpx/openai noisy loggers throttled to WARNING - main.py: add --log-file and --log-level CLI flags Usage: python main.py --scenario scenarios/online/... --log-file logs/eval.log --log-level DEBUG Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
19
main.py
19
main.py
@@ -1,6 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from rag_eval.dataset_builder.runner import run_dataset_build
|
||||
from rag_eval.execution.runner import run_scenario
|
||||
@@ -18,18 +20,33 @@ def parse_args() -> argparse.Namespace:
|
||||
"--dataset-build-config",
|
||||
help="Path to a YAML dataset build config file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-file",
|
||||
default=None,
|
||||
help="Write evaluation logs to this file (in addition to stderr). "
|
||||
"Example: logs/eval.log",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-level",
|
||||
default="INFO",
|
||||
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||
help="Logging verbosity level (default: INFO). Use DEBUG for per-metric detail.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Dispatch the CLI call to the requested workflow."""
|
||||
args = parse_args()
|
||||
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
|
||||
log_file = Path(args.log_file) if args.log_file else None
|
||||
|
||||
if args.dataset_build_config:
|
||||
result = run_dataset_build(args.dataset_build_config)
|
||||
print(f"Completed dataset build: {result.artifact_paths.root_dir}")
|
||||
return
|
||||
|
||||
result = run_scenario(args.scenario)
|
||||
result = run_scenario(args.scenario, log_file=log_file, log_level=log_level)
|
||||
print(f"Completed run: {result.scenario.output_dir}")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user