评估进度
diff --git a/webapp/static/js/report.js b/webapp/static/js/report.js
index b4f06ad..abb7320 100644
--- a/webapp/static/js/report.js
+++ b/webapp/static/js/report.js
@@ -127,6 +127,18 @@ const Report = {
`;
wrap.appendChild(card);
});
+
+ // 综合加权得分卡片
+ const wsValue = (report && report.weighted_score_mean !== undefined) ? report.weighted_score_mean : null;
+ const wsCard = document.createElement("div");
+ wsCard.className = "metric-card weighted-score-card";
+ const wsCls = App.scoreClass(wsValue);
+ const wsText = wsValue === null || wsValue === undefined ? "n/a" : wsValue.toFixed(2);
+ wsCard.innerHTML = `
+
${wsText}
+
综合加权得分
+ `;
+ wrap.appendChild(wsCard);
},
// ② 分数分布直方图(可切换指标)。
diff --git a/webapp/static/js/runner.js b/webapp/static/js/runner.js
index a65a156..1f23817 100644
--- a/webapp/static/js/runner.js
+++ b/webapp/static/js/runner.js
@@ -1,11 +1,11 @@
-// runner.js — 新建评估视图:列出场景、LLM角色配置、触发评估、轮询任务状态与日志。
+// runner.js — 新建评估视图:列出场景、LLM角色配置、权重配置、触发评估、轮询任务状态。
const Runner = {
selectedScenario: null,
+ selectedScenarioInfo: null,
pollTimer: null,
lastRunId: null,
- // 绑定运行按钮。
init() {
document.getElementById("run-btn").addEventListener("click", () => Runner.trigger());
document.getElementById("view-report-btn").addEventListener("click", () => {
@@ -14,9 +14,9 @@ const Runner = {
App.navigate("report", Runner.lastRunId);
}
});
+ document.getElementById("add-doc-weight-btn").addEventListener("click", () => Runner._addDocWeightRow());
},
- // 加载并渲染可触发的场景列表。
async loadScenarios() {
const list = document.getElementById("scenario-list");
list.innerHTML = '
加载中…
';
@@ -32,17 +32,14 @@ const Runner = {
} catch (err) {
list.innerHTML = `
加载失败:${App.escape(err.message)}
`;
}
- // 同时加载 profiles 供角色选择
Runner._populateProfileSelects();
},
- // 填充三个角色下拉框
async _populateProfileSelects() {
const cached = Profiles.getAll();
const profiles = cached.length > 0
? cached
: (await API.profiles().catch(() => ({ profiles: [] }))).profiles;
-
["role-judge", "role-answer", "role-dataset"].forEach(id => {
const sel = document.getElementById(id);
sel.innerHTML = '
';
@@ -55,17 +52,14 @@ const Runner = {
});
},
- // 构造单个场景条目。
renderScenarioItem(sc) {
const item = document.createElement("div");
const invalid = !!sc.error;
item.className = "scenario-item" + (invalid ? " invalid" : "");
-
const modeTag = sc.mode
? `
${App.escape(sc.mode)}`
: "";
const metricCount = (sc.metrics || []).length;
-
item.innerHTML = `
${App.escape(sc.scenario_name || sc.path)}
@@ -77,27 +71,94 @@ const Runner = {
${metricCount} 指标
`;
-
if (!invalid) {
item.addEventListener("click", () => {
document.querySelectorAll(".scenario-item").forEach((el) => el.classList.remove("selected"));
item.classList.add("selected");
Runner.selectedScenario = sc.path;
+ Runner.selectedScenarioInfo = sc;
document.getElementById("selected-scenario").textContent = sc.path;
document.getElementById("run-btn").disabled = false;
- // 显示 LLM 角色面板
document.getElementById("llm-assignment-panel").hidden = false;
+ Runner._renderWeightPanel(sc);
+ document.getElementById("weight-config-panel").hidden = false;
});
}
return item;
},
- // 触发评估:先 apply profiles(若选了),再触发任务。
+ // 根据选中场景渲染指标权重行(动态生成,按场景 metrics 列表)
+ _renderWeightPanel(sc) {
+ const metricRows = document.getElementById("metric-weight-rows");
+ metricRows.innerHTML = "";
+ const metrics = sc.metrics || [];
+ const existingWeights = sc.metric_weights || {};
+ metrics.forEach(metric => {
+ const row = document.createElement("div");
+ row.className = "weight-row";
+ const currentVal = existingWeights[metric] != null ? existingWeights[metric] : 1.0;
+ row.innerHTML = `
+
${App.escape(metric)}
+
+ `;
+ metricRows.appendChild(row);
+ });
+
+ // 填充已有文档权重
+ const docRows = document.getElementById("doc-weight-rows");
+ docRows.innerHTML = "";
+ const existingDocWeights = sc.doc_weights || {};
+ Object.entries(existingDocWeights).forEach(([docName, w]) => {
+ Runner._addDocWeightRow(docName, w);
+ });
+ },
+
+ // 添加一行文档权重输入
+ _addDocWeightRow(docName, weight) {
+ const name = docName !== undefined ? docName : "";
+ const w = weight !== undefined ? weight : 1.0;
+ const container = document.getElementById("doc-weight-rows");
+ const row = document.createElement("div");
+ row.className = "weight-row";
+ row.innerHTML = `
+
+
+
+ `;
+ row.querySelector(".weight-row-remove").addEventListener("click", () => row.remove());
+ container.appendChild(row);
+ },
+
+ // 收集权重面板当前值;全等权时返回 null(不发送)
+ _collectWeights() {
+ const metricWeights = {};
+ document.querySelectorAll("#metric-weight-rows .weight-row-input").forEach(input => {
+ const metric = input.dataset.metric;
+ const val = parseFloat(input.value);
+ if (metric && !isNaN(val)) metricWeights[metric] = val;
+ });
+
+ const docWeights = {};
+ document.querySelectorAll("#doc-weight-rows .weight-row").forEach(row => {
+ const nameInput = row.querySelector(".doc-weight-name");
+ const valInput = row.querySelector(".weight-row-input");
+ if (!nameInput || !valInput) return;
+ const name = nameInput.value.trim();
+ const val = parseFloat(valInput.value);
+ if (name && !isNaN(val)) docWeights[name] = val;
+ });
+
+ const allMetricDefault = Object.values(metricWeights).every(v => Math.abs(v - 1.0) < 1e-9);
+ const noDocWeights = Object.keys(docWeights).length === 0;
+ if (allMetricDefault && noDocWeights) return { metricWeights: null, docWeights: null };
+ return { metricWeights, docWeights };
+ },
+
async trigger() {
if (!Runner.selectedScenario) return;
const runBtn = document.getElementById("run-btn");
runBtn.disabled = true;
-
const panel = document.getElementById("task-panel");
const logBox = document.getElementById("task-log");
const statusBadge = document.getElementById("task-status");
@@ -106,12 +167,8 @@ const Runner = {
reportBtn.hidden = true;
logBox.textContent = "";
Runner._setStatus(statusBadge, "queued");
-
try {
- // Step 1: apply LLM profiles to YAML if any selected
await Runner._applyProfilesIfNeeded(logBox);
-
- // Step 2: trigger evaluation
const resp = await API.triggerEvaluation(Runner.selectedScenario);
Runner.poll(resp.task_id);
} catch (err) {
@@ -121,20 +178,22 @@ const Runner = {
}
},
- // 如果用户选了 profile,就先 apply 写回 YAML
async _applyProfilesIfNeeded(logBox) {
const judgeId = document.getElementById("role-judge").value;
const answerId = document.getElementById("role-answer").value;
const datasetId = document.getElementById("role-dataset").value;
+ const { metricWeights, docWeights } = Runner._collectWeights();
- if (!judgeId && !answerId && !datasetId) return; // 全空,跳过
+ if (!judgeId && !answerId && !datasetId && !metricWeights && !docWeights) return;
- logBox.textContent = "正在将 LLM 配置写入场景文件…\n";
+ logBox.textContent = "正在将 LLM 配置和权重写入场景文件…\n";
const body = {
scenario_path: Runner.selectedScenario,
judge_profile_id: judgeId || null,
answer_profile_id: answerId || null,
dataset_profile_id: datasetId || null,
+ metric_weights: metricWeights,
+ doc_weights: docWeights,
};
const result = await API.applyProfiles(body);
const fields = (result.patched_fields || []).join(", ");
@@ -143,13 +202,11 @@ const Runner = {
: "(未找到可更新的字段,继续运行)\n";
},
- // 周期性轮询任务状态,刷新日志与徽标。
poll(taskId) {
const logBox = document.getElementById("task-log");
const statusBadge = document.getElementById("task-status");
const reportBtn = document.getElementById("view-report-btn");
const runBtn = document.getElementById("run-btn");
-
if (Runner.pollTimer) clearInterval(Runner.pollTimer);
Runner.pollTimer = setInterval(async () => {
try {
@@ -157,7 +214,6 @@ const Runner = {
logBox.textContent = (status.logs || []).join("\n");
logBox.scrollTop = logBox.scrollHeight;
Runner._setStatus(statusBadge, status.status);
-
if (status.status === "completed" || status.status === "failed") {
clearInterval(Runner.pollTimer);
runBtn.disabled = false;
@@ -175,7 +231,6 @@ const Runner = {
}, 1200);
},
- // 更新状态徽标的文本与配色类。
_setStatus(badge, status) {
badge.textContent = status;
badge.className = "badge " + status;