update
This commit is contained in:
@@ -199,6 +199,7 @@ code {
|
||||
.metric-value.bad { color: var(--bad); }
|
||||
.metric-value.na { color: var(--slate-light); }
|
||||
.metric-name { font-size: 12px; color: var(--slate); margin-top: 4px; }
|
||||
.metric-desc { font-size: 12px; color: #64748b; margin-top: 6px; line-height: 1.45; }
|
||||
|
||||
.report-row { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }
|
||||
.report-half { margin-bottom: 0; }
|
||||
|
||||
@@ -267,6 +267,7 @@
|
||||
</div>
|
||||
|
||||
<script src="/static/js/api.js"></script>
|
||||
<script src="/static/js/metric_presenter.js"></script>
|
||||
<script src="/static/js/report.js"></script>
|
||||
<script src="/static/js/profiles.js"></script>
|
||||
<script src="/static/js/runner.js"></script>
|
||||
|
||||
@@ -147,7 +147,7 @@ const App = {
|
||||
const chips = (run.metrics || [])
|
||||
.map((m) => {
|
||||
const val = run.metric_means ? run.metric_means[m] : null;
|
||||
const cls = App.scoreClass(val);
|
||||
const cls = App.scoreClass(m, val);
|
||||
const text = val === null || val === undefined ? "n/a" : val.toFixed(2);
|
||||
return `<span class="metric-chip" title="${App.escape(m)}">${App.escape(App.shortMetric(m))} <b class="${cls}">${text}</b></span>`;
|
||||
})
|
||||
@@ -174,11 +174,8 @@ const App = {
|
||||
if (btn) btn.disabled = false;
|
||||
},
|
||||
|
||||
scoreClass(value) {
|
||||
if (value === null || value === undefined) return "na";
|
||||
if (value >= 0.8) return "good";
|
||||
if (value >= 0.65) return "warn";
|
||||
return "bad";
|
||||
scoreClass(metricName, value) {
|
||||
return MetricPresenter.scoreClass(metricName, value);
|
||||
},
|
||||
|
||||
shortMetric(name) {
|
||||
|
||||
77
webapp/static/js/metric_presenter.js
Normal file
77
webapp/static/js/metric_presenter.js
Normal file
@@ -0,0 +1,77 @@
|
||||
// metric_presenter.js — 统一维护指标语义(高分好 / 低分好)、颜色阈值与简要说明。
|
||||
|
||||
(function attachMetricPresenter(globalObj) {
|
||||
const METRIC_META = {
|
||||
faithfulness: {
|
||||
direction: "higher_better",
|
||||
description: "回答是否被检索内容直接支持,越高越可靠。",
|
||||
},
|
||||
answer_relevancy: {
|
||||
direction: "higher_better",
|
||||
description: "回答与问题是否紧密相关,越高越切题。",
|
||||
},
|
||||
context_recall: {
|
||||
direction: "higher_better",
|
||||
description: "检索片段覆盖标准答案关键信息的程度,越高越完整。",
|
||||
},
|
||||
context_precision: {
|
||||
direction: "higher_better",
|
||||
description: "检索片段中有效信息的占比,越高越精准。",
|
||||
},
|
||||
noise_sensitivity: {
|
||||
direction: "lower_better",
|
||||
description: "对噪声上下文的敏感程度,越低说明抗干扰能力越强。",
|
||||
},
|
||||
factual_correctness: {
|
||||
direction: "higher_better",
|
||||
description: "回答与标准答案在事实层面的吻合程度,越高越准确。",
|
||||
},
|
||||
semantic_similarity: {
|
||||
direction: "higher_better",
|
||||
description: "回答与标准答案在语义上的相似程度,越高越接近。",
|
||||
},
|
||||
};
|
||||
|
||||
function isLowerBetter(metricName) {
|
||||
return METRIC_META[metricName]?.direction === "lower_better";
|
||||
}
|
||||
|
||||
function scoreClass(metricName, value) {
|
||||
if (value === null || value === undefined || Number.isNaN(Number(value))) return "na";
|
||||
const numeric = Number(value);
|
||||
if (isLowerBetter(metricName)) {
|
||||
if (numeric <= 0.15) return "good";
|
||||
if (numeric <= 0.35) return "warn";
|
||||
return "bad";
|
||||
}
|
||||
if (numeric >= 0.85) return "good";
|
||||
if (numeric >= 0.65) return "warn";
|
||||
return "bad";
|
||||
}
|
||||
|
||||
function describeMetric(metricName) {
|
||||
return METRIC_META[metricName]?.description || "该指标用于衡量当前问答样本的评估表现。";
|
||||
}
|
||||
|
||||
function binColor(metricName, lower) {
|
||||
const numeric = Number(lower);
|
||||
if (isLowerBetter(metricName)) {
|
||||
if (numeric < 0.2) return "#16a34a";
|
||||
if (numeric < 0.4) return "#84cc16";
|
||||
if (numeric < 0.6) return "#eab308";
|
||||
if (numeric < 0.8) return "#f97316";
|
||||
return "#dc2626";
|
||||
}
|
||||
if (numeric >= 0.8) return "#16a34a";
|
||||
if (numeric >= 0.6) return "#84cc16";
|
||||
if (numeric >= 0.4) return "#eab308";
|
||||
if (numeric >= 0.2) return "#f97316";
|
||||
return "#dc2626";
|
||||
}
|
||||
|
||||
globalObj.MetricPresenter = {
|
||||
scoreClass,
|
||||
describeMetric,
|
||||
binColor,
|
||||
};
|
||||
})(window);
|
||||
@@ -117,13 +117,15 @@ const Report = {
|
||||
const metrics = report.metrics && report.metrics.length ? report.metrics : summary.metrics;
|
||||
metrics.forEach((metric) => {
|
||||
const value = report.metric_means ? report.metric_means[metric] : null;
|
||||
const cls = App.scoreClass(value);
|
||||
const cls = App.scoreClass(metric, value);
|
||||
const text = value === null || value === undefined ? "n/a" : value.toFixed(2);
|
||||
const description = MetricPresenter.describeMetric(metric);
|
||||
const card = document.createElement("div");
|
||||
card.className = "metric-card";
|
||||
card.innerHTML = `
|
||||
<div class="metric-value ${cls}">${text}</div>
|
||||
<div class="metric-name">${App.escape(metric)}</div>
|
||||
<div class="metric-desc">${App.escape(description)}</div>
|
||||
`;
|
||||
wrap.appendChild(card);
|
||||
});
|
||||
@@ -168,17 +170,13 @@ const Report = {
|
||||
const bins = distributions[metric] || [];
|
||||
const labels = bins.map((b) => b.label);
|
||||
const counts = bins.map((b) => b.count);
|
||||
const colors = bins.map((b) => Report._binColor(b.lower));
|
||||
const colors = bins.map((b) => Report._binColor(metric, b.lower));
|
||||
Report._drawDistChart(labels, counts, colors);
|
||||
},
|
||||
|
||||
// 低分箱偏红、高分箱偏绿,直观暴露长尾。
|
||||
_binColor(lower) {
|
||||
if (lower >= 0.8) return "#16a34a";
|
||||
if (lower >= 0.6) return "#84cc16";
|
||||
if (lower >= 0.4) return "#eab308";
|
||||
if (lower >= 0.2) return "#f97316";
|
||||
return "#dc2626";
|
||||
_binColor(metric, lower) {
|
||||
return MetricPresenter.binColor(metric, lower);
|
||||
},
|
||||
|
||||
// 实际绘制 Chart.js 柱状图。
|
||||
@@ -247,7 +245,7 @@ const Report = {
|
||||
body += `<tr><td>${App.escape(stat.key)}</td><td>${stat.count}</td>`;
|
||||
metrics.forEach((m) => {
|
||||
const v = stat.means ? stat.means[m] : null;
|
||||
const cls = App.scoreClass(v);
|
||||
const cls = App.scoreClass(m, v);
|
||||
const text = v === null || v === undefined ? "—" : v.toFixed(2);
|
||||
body += `<td class="${cls}">${text}</td>`;
|
||||
});
|
||||
@@ -271,7 +269,7 @@ const Report = {
|
||||
const scoreBadges = metrics
|
||||
.map((m) => {
|
||||
const v = sample.metrics ? sample.metrics[m] : null;
|
||||
const cls = App.scoreClass(v);
|
||||
const cls = App.scoreClass(m, v);
|
||||
const text = v === null || v === undefined ? "—" : v.toFixed(2);
|
||||
return `<span class="score-badge ${cls}" title="${App.escape(m)}">${text}</span>`;
|
||||
})
|
||||
|
||||
@@ -50,7 +50,7 @@ const ScoreJobs = {
|
||||
if (job.status === "completed") {
|
||||
scoreHtml = Object.entries(job.scores || {})
|
||||
.map(([k, v]) => {
|
||||
const cls = App.scoreClass(v);
|
||||
const cls = App.scoreClass(k, v);
|
||||
const text = v === null || v === undefined ? "n/a" : Number(v).toFixed(3);
|
||||
return `<span class="metric-chip" title="${App.escape(k)}">${App.escape(App.shortMetric(k))} <b class="${cls}">${text}</b></span>`;
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user