update
This commit is contained in:
@@ -37,6 +37,9 @@ GROUPING_FIELDS = ("difficulty", "question_type", "language")
|
||||
# How many lowest-scoring samples to surface for manual review.
|
||||
LOWEST_SAMPLE_COUNT = 10
|
||||
|
||||
# Metrics whose lower raw value means stronger performance.
|
||||
LOWER_IS_BETTER_METRICS = {"noise_sensitivity"}
|
||||
|
||||
|
||||
def _round_or_none(value: float | None) -> float | None:
|
||||
"""Round a float to four places, mapping NaN/None to None for clean JSON."""
|
||||
@@ -105,7 +108,7 @@ def _groupings(frame: pd.DataFrame, metrics: list[str]) -> dict[str, list[GroupS
|
||||
def _sample_mean(row: pd.Series, metrics: list[str]) -> float | None:
|
||||
"""Average a single sample's available metric scores for ranking."""
|
||||
values = [
|
||||
float(row[metric])
|
||||
(1.0 - float(row[metric])) if metric in LOWER_IS_BETTER_METRICS else float(row[metric])
|
||||
for metric in metrics
|
||||
if metric in row and pd.notna(row[metric])
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user