feat: add weight config panel to 新建评估 and weighted_score card to report
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -342,6 +342,34 @@ table.group-table td { border-bottom: 1px solid #f1f5f9; font-variant-numeric: t
|
|||||||
}
|
}
|
||||||
.report-switcher-select:focus { outline: none; border-color: var(--petrol); }
|
.report-switcher-select:focus { outline: none; border-color: var(--petrol); }
|
||||||
|
|
||||||
|
/* ?? ?????? ??????????????????????????????????? */
|
||||||
|
.weight-config-panel { margin-top: 12px; }
|
||||||
|
.weight-section-title { font-size: 13px; font-weight: 600; color: var(--text); margin-bottom: 8px; }
|
||||||
|
.weight-rows { display: flex; flex-direction: column; gap: 6px; }
|
||||||
|
.weight-row {
|
||||||
|
display: flex; align-items: center; gap: 10px;
|
||||||
|
font-size: 13px;
|
||||||
|
}
|
||||||
|
.weight-row-label { min-width: 180px; color: var(--slate); font-family: monospace; }
|
||||||
|
.weight-row-input {
|
||||||
|
width: 80px; padding: 4px 8px; border: 1px solid var(--border);
|
||||||
|
border-radius: 6px; font-size: 13px; text-align: right;
|
||||||
|
}
|
||||||
|
.weight-row-input:focus { outline: none; border-color: #6366f1; }
|
||||||
|
.doc-weight-name {
|
||||||
|
flex: 1; padding: 4px 8px; border: 1px solid var(--border);
|
||||||
|
border-radius: 6px; font-size: 13px; min-width: 0;
|
||||||
|
}
|
||||||
|
.weight-row-remove { color: var(--bad); cursor: pointer; font-size: 14px; background: none; border: none; padding: 2px 6px; }
|
||||||
|
.weight-row-remove:hover { background: #fee2e2; border-radius: 4px; }
|
||||||
|
|
||||||
|
/* weighted_score ???????? */
|
||||||
|
.metric-card.weighted-score-card {
|
||||||
|
border: 2px solid #6366f1;
|
||||||
|
background: #f5f3ff;
|
||||||
|
}
|
||||||
|
.metric-card.weighted-score-card .metric-name { color: #4f46e5; font-weight: 700; }
|
||||||
|
|
||||||
/* ================================================================
|
/* ================================================================
|
||||||
打印样式(导出 PDF 用)
|
打印样式(导出 PDF 用)
|
||||||
浏览器打印时隐藏 UI chrome,保留报告内容,图表 canvas 原样输出
|
浏览器打印时隐藏 UI chrome,保留报告内容,图表 canvas 原样输出
|
||||||
|
|||||||
@@ -92,6 +92,22 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- ??????????????? -->
|
||||||
|
<div class="panel weight-config-panel" id="weight-config-panel" hidden>
|
||||||
|
<h2>???? <span class="muted" style="font-size:13px;font-weight:400">???????????????</span></h2>
|
||||||
|
|
||||||
|
<div class="weight-section">
|
||||||
|
<div class="weight-section-title">???? <span class="muted" style="font-size:12px">???????????????????</span></div>
|
||||||
|
<div id="metric-weight-rows" class="weight-rows"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="weight-section" style="margin-top:16px">
|
||||||
|
<div class="weight-section-title">???? <span class="muted" style="font-size:12px">?? PDF ???????????????????????</span></div>
|
||||||
|
<div id="doc-weight-rows" class="weight-rows"></div>
|
||||||
|
<button class="btn btn-sm" id="add-doc-weight-btn" style="margin-top:8px">? ??????</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="panel" id="task-panel" hidden>
|
<div class="panel" id="task-panel" hidden>
|
||||||
<div class="task-head">
|
<div class="task-head">
|
||||||
<h2>评估进度</h2>
|
<h2>评估进度</h2>
|
||||||
|
|||||||
@@ -127,6 +127,18 @@ const Report = {
|
|||||||
`;
|
`;
|
||||||
wrap.appendChild(card);
|
wrap.appendChild(card);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// 综合加权得分卡片
|
||||||
|
const wsValue = (report && report.weighted_score_mean !== undefined) ? report.weighted_score_mean : null;
|
||||||
|
const wsCard = document.createElement("div");
|
||||||
|
wsCard.className = "metric-card weighted-score-card";
|
||||||
|
const wsCls = App.scoreClass(wsValue);
|
||||||
|
const wsText = wsValue === null || wsValue === undefined ? "n/a" : wsValue.toFixed(2);
|
||||||
|
wsCard.innerHTML = `
|
||||||
|
<div class="metric-value ${wsCls}">${wsText}</div>
|
||||||
|
<div class="metric-name">综合加权得分</div>
|
||||||
|
`;
|
||||||
|
wrap.appendChild(wsCard);
|
||||||
},
|
},
|
||||||
|
|
||||||
// ② 分数分布直方图(可切换指标)。
|
// ② 分数分布直方图(可切换指标)。
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
// runner.js — 新建评估视图:列出场景、LLM角色配置、触发评估、轮询任务状态与日志。
|
// runner.js — 新建评估视图:列出场景、LLM角色配置、权重配置、触发评估、轮询任务状态。
|
||||||
|
|
||||||
const Runner = {
|
const Runner = {
|
||||||
selectedScenario: null,
|
selectedScenario: null,
|
||||||
|
selectedScenarioInfo: null,
|
||||||
pollTimer: null,
|
pollTimer: null,
|
||||||
lastRunId: null,
|
lastRunId: null,
|
||||||
|
|
||||||
// 绑定运行按钮。
|
|
||||||
init() {
|
init() {
|
||||||
document.getElementById("run-btn").addEventListener("click", () => Runner.trigger());
|
document.getElementById("run-btn").addEventListener("click", () => Runner.trigger());
|
||||||
document.getElementById("view-report-btn").addEventListener("click", () => {
|
document.getElementById("view-report-btn").addEventListener("click", () => {
|
||||||
@@ -14,9 +14,9 @@ const Runner = {
|
|||||||
App.navigate("report", Runner.lastRunId);
|
App.navigate("report", Runner.lastRunId);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
document.getElementById("add-doc-weight-btn").addEventListener("click", () => Runner._addDocWeightRow());
|
||||||
},
|
},
|
||||||
|
|
||||||
// 加载并渲染可触发的场景列表。
|
|
||||||
async loadScenarios() {
|
async loadScenarios() {
|
||||||
const list = document.getElementById("scenario-list");
|
const list = document.getElementById("scenario-list");
|
||||||
list.innerHTML = '<p class="muted">加载中…</p>';
|
list.innerHTML = '<p class="muted">加载中…</p>';
|
||||||
@@ -32,17 +32,14 @@ const Runner = {
|
|||||||
} catch (err) {
|
} catch (err) {
|
||||||
list.innerHTML = `<p class="muted">加载失败:${App.escape(err.message)}</p>`;
|
list.innerHTML = `<p class="muted">加载失败:${App.escape(err.message)}</p>`;
|
||||||
}
|
}
|
||||||
// 同时加载 profiles 供角色选择
|
|
||||||
Runner._populateProfileSelects();
|
Runner._populateProfileSelects();
|
||||||
},
|
},
|
||||||
|
|
||||||
// 填充三个角色下拉框
|
|
||||||
async _populateProfileSelects() {
|
async _populateProfileSelects() {
|
||||||
const cached = Profiles.getAll();
|
const cached = Profiles.getAll();
|
||||||
const profiles = cached.length > 0
|
const profiles = cached.length > 0
|
||||||
? cached
|
? cached
|
||||||
: (await API.profiles().catch(() => ({ profiles: [] }))).profiles;
|
: (await API.profiles().catch(() => ({ profiles: [] }))).profiles;
|
||||||
|
|
||||||
["role-judge", "role-answer", "role-dataset"].forEach(id => {
|
["role-judge", "role-answer", "role-dataset"].forEach(id => {
|
||||||
const sel = document.getElementById(id);
|
const sel = document.getElementById(id);
|
||||||
sel.innerHTML = '<option value="">— 使用场景原始配置 —</option>';
|
sel.innerHTML = '<option value="">— 使用场景原始配置 —</option>';
|
||||||
@@ -55,17 +52,14 @@ const Runner = {
|
|||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
// 构造单个场景条目。
|
|
||||||
renderScenarioItem(sc) {
|
renderScenarioItem(sc) {
|
||||||
const item = document.createElement("div");
|
const item = document.createElement("div");
|
||||||
const invalid = !!sc.error;
|
const invalid = !!sc.error;
|
||||||
item.className = "scenario-item" + (invalid ? " invalid" : "");
|
item.className = "scenario-item" + (invalid ? " invalid" : "");
|
||||||
|
|
||||||
const modeTag = sc.mode
|
const modeTag = sc.mode
|
||||||
? `<span class="tag mode-${App.escape(sc.mode)}">${App.escape(sc.mode)}</span>`
|
? `<span class="tag mode-${App.escape(sc.mode)}">${App.escape(sc.mode)}</span>`
|
||||||
: "";
|
: "";
|
||||||
const metricCount = (sc.metrics || []).length;
|
const metricCount = (sc.metrics || []).length;
|
||||||
|
|
||||||
item.innerHTML = `
|
item.innerHTML = `
|
||||||
<div>
|
<div>
|
||||||
<div class="scenario-name">${App.escape(sc.scenario_name || sc.path)}</div>
|
<div class="scenario-name">${App.escape(sc.scenario_name || sc.path)}</div>
|
||||||
@@ -77,27 +71,94 @@ const Runner = {
|
|||||||
<span class="tag">${metricCount} 指标</span>
|
<span class="tag">${metricCount} 指标</span>
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
|
|
||||||
if (!invalid) {
|
if (!invalid) {
|
||||||
item.addEventListener("click", () => {
|
item.addEventListener("click", () => {
|
||||||
document.querySelectorAll(".scenario-item").forEach((el) => el.classList.remove("selected"));
|
document.querySelectorAll(".scenario-item").forEach((el) => el.classList.remove("selected"));
|
||||||
item.classList.add("selected");
|
item.classList.add("selected");
|
||||||
Runner.selectedScenario = sc.path;
|
Runner.selectedScenario = sc.path;
|
||||||
|
Runner.selectedScenarioInfo = sc;
|
||||||
document.getElementById("selected-scenario").textContent = sc.path;
|
document.getElementById("selected-scenario").textContent = sc.path;
|
||||||
document.getElementById("run-btn").disabled = false;
|
document.getElementById("run-btn").disabled = false;
|
||||||
// 显示 LLM 角色面板
|
|
||||||
document.getElementById("llm-assignment-panel").hidden = false;
|
document.getElementById("llm-assignment-panel").hidden = false;
|
||||||
|
Runner._renderWeightPanel(sc);
|
||||||
|
document.getElementById("weight-config-panel").hidden = false;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return item;
|
return item;
|
||||||
},
|
},
|
||||||
|
|
||||||
// 触发评估:先 apply profiles(若选了),再触发任务。
|
// 根据选中场景渲染指标权重行(动态生成,按场景 metrics 列表)
|
||||||
|
_renderWeightPanel(sc) {
|
||||||
|
const metricRows = document.getElementById("metric-weight-rows");
|
||||||
|
metricRows.innerHTML = "";
|
||||||
|
const metrics = sc.metrics || [];
|
||||||
|
const existingWeights = sc.metric_weights || {};
|
||||||
|
metrics.forEach(metric => {
|
||||||
|
const row = document.createElement("div");
|
||||||
|
row.className = "weight-row";
|
||||||
|
const currentVal = existingWeights[metric] != null ? existingWeights[metric] : 1.0;
|
||||||
|
row.innerHTML = `
|
||||||
|
<span class="weight-row-label">${App.escape(metric)}</span>
|
||||||
|
<input class="weight-row-input" type="number" min="0" step="0.1"
|
||||||
|
data-metric="${App.escape(metric)}" value="${currentVal}" />
|
||||||
|
`;
|
||||||
|
metricRows.appendChild(row);
|
||||||
|
});
|
||||||
|
|
||||||
|
// 填充已有文档权重
|
||||||
|
const docRows = document.getElementById("doc-weight-rows");
|
||||||
|
docRows.innerHTML = "";
|
||||||
|
const existingDocWeights = sc.doc_weights || {};
|
||||||
|
Object.entries(existingDocWeights).forEach(([docName, w]) => {
|
||||||
|
Runner._addDocWeightRow(docName, w);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
// 添加一行文档权重输入
|
||||||
|
_addDocWeightRow(docName, weight) {
|
||||||
|
const name = docName !== undefined ? docName : "";
|
||||||
|
const w = weight !== undefined ? weight : 1.0;
|
||||||
|
const container = document.getElementById("doc-weight-rows");
|
||||||
|
const row = document.createElement("div");
|
||||||
|
row.className = "weight-row";
|
||||||
|
row.innerHTML = `
|
||||||
|
<input class="doc-weight-name" type="text" placeholder="PDF 文件名(如 322_双源CT.pdf)" value="${App.escape(String(name))}" />
|
||||||
|
<input class="weight-row-input" type="number" min="0" step="0.1" value="${w}" />
|
||||||
|
<button class="weight-row-remove" title="删除">✕</button>
|
||||||
|
`;
|
||||||
|
row.querySelector(".weight-row-remove").addEventListener("click", () => row.remove());
|
||||||
|
container.appendChild(row);
|
||||||
|
},
|
||||||
|
|
||||||
|
// 收集权重面板当前值;全等权时返回 null(不发送)
|
||||||
|
_collectWeights() {
|
||||||
|
const metricWeights = {};
|
||||||
|
document.querySelectorAll("#metric-weight-rows .weight-row-input").forEach(input => {
|
||||||
|
const metric = input.dataset.metric;
|
||||||
|
const val = parseFloat(input.value);
|
||||||
|
if (metric && !isNaN(val)) metricWeights[metric] = val;
|
||||||
|
});
|
||||||
|
|
||||||
|
const docWeights = {};
|
||||||
|
document.querySelectorAll("#doc-weight-rows .weight-row").forEach(row => {
|
||||||
|
const nameInput = row.querySelector(".doc-weight-name");
|
||||||
|
const valInput = row.querySelector(".weight-row-input");
|
||||||
|
if (!nameInput || !valInput) return;
|
||||||
|
const name = nameInput.value.trim();
|
||||||
|
const val = parseFloat(valInput.value);
|
||||||
|
if (name && !isNaN(val)) docWeights[name] = val;
|
||||||
|
});
|
||||||
|
|
||||||
|
const allMetricDefault = Object.values(metricWeights).every(v => Math.abs(v - 1.0) < 1e-9);
|
||||||
|
const noDocWeights = Object.keys(docWeights).length === 0;
|
||||||
|
if (allMetricDefault && noDocWeights) return { metricWeights: null, docWeights: null };
|
||||||
|
return { metricWeights, docWeights };
|
||||||
|
},
|
||||||
|
|
||||||
async trigger() {
|
async trigger() {
|
||||||
if (!Runner.selectedScenario) return;
|
if (!Runner.selectedScenario) return;
|
||||||
const runBtn = document.getElementById("run-btn");
|
const runBtn = document.getElementById("run-btn");
|
||||||
runBtn.disabled = true;
|
runBtn.disabled = true;
|
||||||
|
|
||||||
const panel = document.getElementById("task-panel");
|
const panel = document.getElementById("task-panel");
|
||||||
const logBox = document.getElementById("task-log");
|
const logBox = document.getElementById("task-log");
|
||||||
const statusBadge = document.getElementById("task-status");
|
const statusBadge = document.getElementById("task-status");
|
||||||
@@ -106,12 +167,8 @@ const Runner = {
|
|||||||
reportBtn.hidden = true;
|
reportBtn.hidden = true;
|
||||||
logBox.textContent = "";
|
logBox.textContent = "";
|
||||||
Runner._setStatus(statusBadge, "queued");
|
Runner._setStatus(statusBadge, "queued");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Step 1: apply LLM profiles to YAML if any selected
|
|
||||||
await Runner._applyProfilesIfNeeded(logBox);
|
await Runner._applyProfilesIfNeeded(logBox);
|
||||||
|
|
||||||
// Step 2: trigger evaluation
|
|
||||||
const resp = await API.triggerEvaluation(Runner.selectedScenario);
|
const resp = await API.triggerEvaluation(Runner.selectedScenario);
|
||||||
Runner.poll(resp.task_id);
|
Runner.poll(resp.task_id);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -121,20 +178,22 @@ const Runner = {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
// 如果用户选了 profile,就先 apply 写回 YAML
|
|
||||||
async _applyProfilesIfNeeded(logBox) {
|
async _applyProfilesIfNeeded(logBox) {
|
||||||
const judgeId = document.getElementById("role-judge").value;
|
const judgeId = document.getElementById("role-judge").value;
|
||||||
const answerId = document.getElementById("role-answer").value;
|
const answerId = document.getElementById("role-answer").value;
|
||||||
const datasetId = document.getElementById("role-dataset").value;
|
const datasetId = document.getElementById("role-dataset").value;
|
||||||
|
const { metricWeights, docWeights } = Runner._collectWeights();
|
||||||
|
|
||||||
if (!judgeId && !answerId && !datasetId) return; // 全空,跳过
|
if (!judgeId && !answerId && !datasetId && !metricWeights && !docWeights) return;
|
||||||
|
|
||||||
logBox.textContent = "正在将 LLM 配置写入场景文件…\n";
|
logBox.textContent = "正在将 LLM 配置和权重写入场景文件…\n";
|
||||||
const body = {
|
const body = {
|
||||||
scenario_path: Runner.selectedScenario,
|
scenario_path: Runner.selectedScenario,
|
||||||
judge_profile_id: judgeId || null,
|
judge_profile_id: judgeId || null,
|
||||||
answer_profile_id: answerId || null,
|
answer_profile_id: answerId || null,
|
||||||
dataset_profile_id: datasetId || null,
|
dataset_profile_id: datasetId || null,
|
||||||
|
metric_weights: metricWeights,
|
||||||
|
doc_weights: docWeights,
|
||||||
};
|
};
|
||||||
const result = await API.applyProfiles(body);
|
const result = await API.applyProfiles(body);
|
||||||
const fields = (result.patched_fields || []).join(", ");
|
const fields = (result.patched_fields || []).join(", ");
|
||||||
@@ -143,13 +202,11 @@ const Runner = {
|
|||||||
: "(未找到可更新的字段,继续运行)\n";
|
: "(未找到可更新的字段,继续运行)\n";
|
||||||
},
|
},
|
||||||
|
|
||||||
// 周期性轮询任务状态,刷新日志与徽标。
|
|
||||||
poll(taskId) {
|
poll(taskId) {
|
||||||
const logBox = document.getElementById("task-log");
|
const logBox = document.getElementById("task-log");
|
||||||
const statusBadge = document.getElementById("task-status");
|
const statusBadge = document.getElementById("task-status");
|
||||||
const reportBtn = document.getElementById("view-report-btn");
|
const reportBtn = document.getElementById("view-report-btn");
|
||||||
const runBtn = document.getElementById("run-btn");
|
const runBtn = document.getElementById("run-btn");
|
||||||
|
|
||||||
if (Runner.pollTimer) clearInterval(Runner.pollTimer);
|
if (Runner.pollTimer) clearInterval(Runner.pollTimer);
|
||||||
Runner.pollTimer = setInterval(async () => {
|
Runner.pollTimer = setInterval(async () => {
|
||||||
try {
|
try {
|
||||||
@@ -157,7 +214,6 @@ const Runner = {
|
|||||||
logBox.textContent = (status.logs || []).join("\n");
|
logBox.textContent = (status.logs || []).join("\n");
|
||||||
logBox.scrollTop = logBox.scrollHeight;
|
logBox.scrollTop = logBox.scrollHeight;
|
||||||
Runner._setStatus(statusBadge, status.status);
|
Runner._setStatus(statusBadge, status.status);
|
||||||
|
|
||||||
if (status.status === "completed" || status.status === "failed") {
|
if (status.status === "completed" || status.status === "failed") {
|
||||||
clearInterval(Runner.pollTimer);
|
clearInterval(Runner.pollTimer);
|
||||||
runBtn.disabled = false;
|
runBtn.disabled = false;
|
||||||
@@ -175,7 +231,6 @@ const Runner = {
|
|||||||
}, 1200);
|
}, 1200);
|
||||||
},
|
},
|
||||||
|
|
||||||
// 更新状态徽标的文本与配色类。
|
|
||||||
_setStatus(badge, status) {
|
_setStatus(badge, status) {
|
||||||
badge.textContent = status;
|
badge.textContent = status;
|
||||||
badge.className = "badge " + status;
|
badge.className = "badge " + status;
|
||||||
|
|||||||
Reference in New Issue
Block a user