feat(logging): add structured evaluation logs for metric-level debugging

- pipeline.py: log each metric score/timeout/error with sample_id,
  elapsed time, and score value; log NaN list per sample; progress
  counter N/total after each sample completes
- evaluator.py: log eval start, dataset counts, adapter enrichment
  progress (per-sample OK/FAIL with elapsed), metric scoring summary,
  and per-metric NaN rate at end of run
- runner.py: _setup_logging() helper writes to stderr + optional file;
  ragas/httpx/openai noisy loggers throttled to WARNING
- main.py: add --log-file and --log-level CLI flags

Usage:
  python main.py --scenario scenarios/online/... --log-file logs/eval.log --log-level DEBUG

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-06-16 10:48:41 +08:00
parent 1ff4a3943a
commit 629304aa6d
4 changed files with 164 additions and 10 deletions

19
main.py
View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import argparse
import logging
from pathlib import Path
from rag_eval.dataset_builder.runner import run_dataset_build
from rag_eval.execution.runner import run_scenario
@@ -18,18 +20,33 @@ def parse_args() -> argparse.Namespace:
"--dataset-build-config",
help="Path to a YAML dataset build config file.",
)
parser.add_argument(
"--log-file",
default=None,
help="Write evaluation logs to this file (in addition to stderr). "
"Example: logs/eval.log",
)
parser.add_argument(
"--log-level",
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
help="Logging verbosity level (default: INFO). Use DEBUG for per-metric detail.",
)
return parser.parse_args()
def main() -> None:
"""Dispatch the CLI call to the requested workflow."""
args = parse_args()
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
log_file = Path(args.log_file) if args.log_file else None
if args.dataset_build_config:
result = run_dataset_build(args.dataset_build_config)
print(f"Completed dataset build: {result.artifact_paths.root_dir}")
return
result = run_scenario(args.scenario)
result = run_scenario(args.scenario, log_file=log_file, log_level=log_level)
print(f"Completed run: {result.scenario.output_dir}")