Add RAGAS evaluation web console (FastAPI + vanilla JS)

- webapp/: FastAPI backend with runs/scenarios/evaluations API routers;
  services for run_reader, report_builder, scenario_scanner, task_manager
  (lazy ragas import — server boots even without ragas); Pydantic models
- webapp/static/: single-page console (layout A: left-nav + main area);
  report detail with metric cards, Chart.js distribution histogram,
  grouping table, lowest-score sample review; trigger evaluation + log polling
- webmain.py: uvicorn entry point (alongside existing main.py CLI)
- start.bat: Windows one-click launcher with env checks and auto-browser open
- rag_eval/datasets/: implement missing loader + normalizer modules
  (load_dataset_records, normalize_records) required by evaluator
- scripts/seed_sample_run.py: generate realistic demo run artifacts
- .gitignore: exclude datasets/ data files but keep rag_eval/datasets/ source

Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 15:53:57 +08:00
parent 9cbdc1d95d
commit e89695e490
26 changed files with 2496 additions and 2 deletions

46
webapp/static/js/api.js Normal file
View File

@@ -0,0 +1,46 @@
// api.js — 控制台后端 HTTP 接口的轻量封装。
const API = {
// 通用 JSON GET失败时抛出带状态码的错误。
async get(path) {
const resp = await fetch(path);
if (!resp.ok) {
const detail = await API._extractError(resp);
throw new Error(detail);
}
return resp.json();
},
// 通用 JSON POST。
async post(path, body) {
const resp = await fetch(path, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body || {}),
});
if (!resp.ok) {
const detail = await API._extractError(resp);
throw new Error(detail);
}
return resp.json();
},
// 从错误响应中尽量解析出 detail 文本。
async _extractError(resp) {
try {
const data = await resp.json();
return data.detail || `请求失败 (${resp.status})`;
} catch (_e) {
return `请求失败 (${resp.status})`;
}
},
health() { return API.get("/api/health"); },
runs() { return API.get("/api/runs"); },
runDetail(runId) { return API.get(`/api/runs/${encodeURIComponent(runId)}`); },
scenarios() { return API.get("/api/scenarios"); },
triggerEvaluation(scenarioPath) {
return API.post("/api/evaluations", { scenario_path: scenarioPath });
},
taskStatus(taskId) { return API.get(`/api/evaluations/${encodeURIComponent(taskId)}`); },
};

152
webapp/static/js/app.js Normal file
View File

@@ -0,0 +1,152 @@
// app.js — 视图路由、运行列表渲染、健康检查。整个控制台的入口编排。
const App = {
currentRunId: null,
views: ["runs", "new", "report"],
titles: { runs: "运行列表", new: "新建评估", report: "报告详情" },
// 初始化:绑定导航、加载首屏、启动健康检查。
init() {
document.querySelectorAll(".nav-item").forEach((btn) => {
btn.addEventListener("click", () => App.switchView(btn.dataset.view));
});
document.getElementById("refresh-btn").addEventListener("click", () => App.refreshCurrent());
Runner.init();
App.switchView("runs");
App.checkHealth();
setInterval(App.checkHealth, 15000);
},
// 切换主视图,并同步导航高亮与标题。
switchView(view) {
if (view === "report" && !App.currentRunId) {
// 没有选中的运行时,报告页显示占位。
}
App.views.forEach((name) => {
const el = document.getElementById(`view-${name}`);
if (el) el.hidden = name !== view;
});
document.querySelectorAll(".nav-item").forEach((btn) => {
btn.classList.toggle("active", btn.dataset.view === view);
});
document.getElementById("view-title").textContent = App.titles[view] || view;
App.activeView = view;
if (view === "runs") App.loadRuns();
if (view === "new") Runner.loadScenarios();
if (view === "report") Report.render(App.currentRunId);
},
// 刷新当前视图的数据。
refreshCurrent() {
App.switchView(App.activeView || "runs");
},
// 加载并渲染运行列表。
async loadRuns() {
const container = document.getElementById("runs-container");
const empty = document.getElementById("runs-empty");
container.innerHTML = '<p class="muted">加载中…</p>';
try {
const data = await API.runs();
const runs = data.runs || [];
if (runs.length === 0) {
container.innerHTML = "";
empty.hidden = false;
return;
}
empty.hidden = true;
container.innerHTML = "";
runs.forEach((run) => container.appendChild(App.renderRunCard(run)));
} catch (err) {
container.innerHTML = `<p class="muted">加载失败:${App.escape(err.message)}</p>`;
}
},
// 构造一张运行卡片。
renderRunCard(run) {
const card = document.createElement("div");
card.className = "run-card";
card.addEventListener("click", () => {
App.currentRunId = run.run_id;
App.enableReportNav();
App.switchView("report");
});
const chips = (run.metrics || [])
.map((m) => {
const val = run.metric_means ? run.metric_means[m] : null;
const cls = App.scoreClass(val);
const text = val === null || val === undefined ? "n/a" : val.toFixed(2);
return `<span class="metric-chip">${App.escape(App.shortMetric(m))} <b class="${cls}">${text}</b></span>`;
})
.join("");
card.innerHTML = `
<div class="run-card-head">
<div class="run-card-title">${App.escape(run.scenario_name || run.run_id)}</div>
</div>
<div class="run-card-meta">
<div>${App.escape(run.mode || "—")} · judge: ${App.escape(run.judge_model || "—")}</div>
<div>${run.valid_samples} 有效 / ${run.invalid_samples} 无效 · ${App.escape(App.shortTime(run.finished_at))}</div>
</div>
<div class="run-card-metrics">${chips}</div>
`;
return card;
},
// 启用报告导航项(选中运行后)。
enableReportNav() {
const btn = document.querySelector('.nav-item[data-view="report"]');
if (btn) btn.disabled = false;
},
// 根据分值返回 good/warn/bad/na 配色类。
scoreClass(value) {
if (value === null || value === undefined) return "na";
if (value >= 0.8) return "good";
if (value >= 0.65) return "warn";
return "bad";
},
// 指标名缩写,节省卡片横向空间。
shortMetric(name) {
const map = {
faithfulness: "faith.",
answer_relevancy: "ans.rel.",
context_recall: "ctx.recall",
context_precision: "ctx.prec.",
};
return map[name] || name;
},
// 截取时间戳到分钟,便于阅读。
shortTime(iso) {
if (!iso) return "—";
return String(iso).replace("T", " ").slice(0, 16);
},
// 简单 HTML 转义,防止注入。
escape(text) {
const div = document.createElement("div");
div.textContent = text == null ? "" : String(text);
return div.innerHTML;
},
// 健康检查,更新左下角状态点。
async checkHealth() {
const dot = document.getElementById("health-dot");
const label = document.getElementById("health-text");
try {
await API.health();
dot.className = "dot ok";
label.textContent = "服务正常";
} catch (_e) {
dot.className = "dot bad";
label.textContent = "服务离线";
}
},
};
document.addEventListener("DOMContentLoaded", App.init);

258
webapp/static/js/report.js Normal file
View File

@@ -0,0 +1,258 @@
// report.js — 报告详情页渲染:元信息、指标卡片、分布图、分组表、低分样本复核。
const Report = {
distChart: null,
currentDetail: null,
activeGrouping: null,
// 加载并渲染指定运行的完整报告。
async render(runId) {
const empty = document.getElementById("report-empty");
const content = document.getElementById("report-content");
if (!runId) {
empty.hidden = false;
content.hidden = true;
return;
}
empty.hidden = true;
content.hidden = false;
content.style.opacity = "0.4";
try {
const detail = await API.runDetail(runId);
Report.currentDetail = detail;
Report.renderMeta(detail.summary);
Report.renderMetricCards(detail.summary, detail.report);
Report.renderDistribution(detail.report);
Report.renderGroupings(detail.report);
Report.renderLowest(detail.report);
content.style.opacity = "1";
} catch (err) {
empty.hidden = false;
content.hidden = true;
empty.innerHTML = `<p>加载报告失败:${App.escape(err.message)}</p>`;
}
},
// 顶部元信息条。
renderMeta(summary) {
const el = document.getElementById("report-meta");
el.innerHTML = `
<div>
<div class="report-meta-title">${App.escape(summary.scenario_name || summary.run_id)}
<span class="status-pill completed">● completed</span></div>
<div class="report-meta-info">run_id: ${App.escape(summary.run_id)}</div>
</div>
<div class="report-meta-info">
${App.escape(summary.mode || "—")} · judge: ${App.escape(summary.judge_model || "—")}
· ${summary.total_samples} 样本 (${summary.valid_samples} 有效 / ${summary.invalid_samples} 无效)
· ${App.escape(App.shortTime(summary.finished_at))}
</div>
`;
},
// ① 指标均值卡片。
renderMetricCards(summary, report) {
const wrap = document.getElementById("metric-cards");
wrap.innerHTML = "";
const metrics = report.metrics && report.metrics.length ? report.metrics : summary.metrics;
metrics.forEach((metric) => {
const value = report.metric_means ? report.metric_means[metric] : null;
const cls = App.scoreClass(value);
const text = value === null || value === undefined ? "n/a" : value.toFixed(2);
const card = document.createElement("div");
card.className = "metric-card";
card.innerHTML = `
<div class="metric-value ${cls}">${text}</div>
<div class="metric-name">${App.escape(metric)}</div>
`;
wrap.appendChild(card);
});
},
// ② 分数分布直方图(可切换指标)。
renderDistribution(report) {
const select = document.getElementById("dist-metric-select");
const distributions = report.distributions || {};
const metricsWithDist = Object.keys(distributions);
select.innerHTML = "";
if (metricsWithDist.length === 0) {
Report._drawDistChart([], []);
return;
}
metricsWithDist.forEach((metric) => {
const opt = document.createElement("option");
opt.value = metric;
opt.textContent = metric;
select.appendChild(opt);
});
select.onchange = () => Report._updateDistChart(select.value);
Report._updateDistChart(metricsWithDist[0]);
},
// 用选定指标的分箱数据刷新直方图。
_updateDistChart(metric) {
const distributions = Report.currentDetail.report.distributions || {};
const bins = distributions[metric] || [];
const labels = bins.map((b) => b.label);
const counts = bins.map((b) => b.count);
const colors = bins.map((b) => Report._binColor(b.lower));
Report._drawDistChart(labels, counts, colors);
},
// 低分箱偏红、高分箱偏绿,直观暴露长尾。
_binColor(lower) {
if (lower >= 0.8) return "#16a34a";
if (lower >= 0.6) return "#84cc16";
if (lower >= 0.4) return "#eab308";
if (lower >= 0.2) return "#f97316";
return "#dc2626";
},
// 实际绘制 Chart.js 柱状图。
_drawDistChart(labels, counts, colors) {
const canvas = document.getElementById("dist-chart");
if (Report.distChart) Report.distChart.destroy();
Report.distChart = new Chart(canvas, {
type: "bar",
data: {
labels,
datasets: [{ data: counts, backgroundColor: colors || "#009999", borderRadius: 4 }],
},
options: {
responsive: true,
plugins: { legend: { display: false } },
scales: {
y: { beginAtZero: true, ticks: { precision: 0 }, grid: { color: "#f1f5f9" } },
x: { grid: { display: false } },
},
},
});
},
// ③ 分组均值difficulty / question_type / language
renderGroupings(report) {
const tabsEl = document.getElementById("grouping-tabs");
const tableEl = document.getElementById("grouping-table");
const groupings = report.groupings || {};
const fields = Object.keys(groupings);
tabsEl.innerHTML = "";
if (fields.length === 0) {
tableEl.innerHTML = '<p class="muted tiny">数据集未包含可分组字段difficulty / question_type。</p>';
return;
}
const fieldLabels = { difficulty: "难度", question_type: "类型", language: "语言" };
Report.activeGrouping = fields[0];
fields.forEach((field) => {
const tab = document.createElement("button");
tab.className = "grouping-tab" + (field === Report.activeGrouping ? " active" : "");
tab.textContent = fieldLabels[field] || field;
tab.onclick = () => {
Report.activeGrouping = field;
tabsEl.querySelectorAll(".grouping-tab").forEach((t) => t.classList.remove("active"));
tab.classList.add("active");
Report._drawGroupTable(report, field);
};
tabsEl.appendChild(tab);
});
Report._drawGroupTable(report, Report.activeGrouping);
},
// 渲染单个分组字段的均值表。
_drawGroupTable(report, field) {
const tableEl = document.getElementById("grouping-table");
const stats = report.groupings[field] || [];
const metrics = report.metrics || [];
let head = "<tr><th>组</th><th>样本</th>";
metrics.forEach((m) => (head += `<th>${App.escape(App.shortMetric(m))}</th>`));
head += "</tr>";
let body = "";
stats.forEach((stat) => {
body += `<tr><td>${App.escape(stat.key)}</td><td>${stat.count}</td>`;
metrics.forEach((m) => {
const v = stat.means ? stat.means[m] : null;
const cls = App.scoreClass(v);
const text = v === null || v === undefined ? "—" : v.toFixed(2);
body += `<td class="${cls}">${text}</td>`;
});
body += "</tr>";
});
tableEl.innerHTML = `<table class="group-table">${head}${body}</table>`;
},
// ④ 最低分样本逐条复核表(点击展开)。
renderLowest(report) {
const wrap = document.getElementById("lowest-table");
const samples = report.lowest_samples || [];
wrap.innerHTML = "";
if (samples.length === 0) {
wrap.innerHTML = '<div class="lowest-detail-inner" style="padding:16px">暂无可复核样本。</div>';
return;
}
const metrics = report.metrics || [];
samples.forEach((sample, idx) => {
const row = document.createElement("div");
row.className = "lowest-row";
const scoreBadges = metrics
.map((m) => {
const v = sample.metrics ? sample.metrics[m] : null;
const cls = App.scoreClass(v);
const text = v === null || v === undefined ? "—" : v.toFixed(2);
return `<span class="score-badge ${cls}" title="${App.escape(m)}">${text}</span>`;
})
.join("");
row.innerHTML = `
<span class="sid">${App.escape(sample.sample_id)}</span>
<span class="q">${App.escape(sample.question || "—")}</span>
<span class="scores">${scoreBadges}</span>
`;
const detail = document.createElement("div");
detail.className = "lowest-detail";
detail.hidden = true;
detail.innerHTML = Report._detailHtml(sample);
row.addEventListener("click", () => {
detail.hidden = !detail.hidden;
});
wrap.appendChild(row);
wrap.appendChild(detail);
});
},
// 单条样本的展开详情question / contexts / answer / ground_truth。
_detailHtml(sample) {
const contexts = (sample.contexts || [])
.map((c, i) => `<div class="ctx-item">[${i + 1}] ${App.escape(c)}</div>`)
.join("");
const errorBlock = sample.error
? `<div class="detail-field"><div class="detail-label">错误 error</div><div style="color:#dc2626">${App.escape(sample.error)}</div></div>`
: "";
return `
<div class="lowest-detail-inner">
<div class="detail-field">
<div class="detail-label">问题 question</div>
<div>${App.escape(sample.question || "—")}</div>
</div>
<div class="detail-field">
<div class="detail-label">检索片段 contexts</div>
<div class="detail-context">${contexts || "(空)"}</div>
</div>
<div class="detail-field">
<div class="detail-label">生成答案 answer</div>
<div>${App.escape(sample.answer || "—")}</div>
</div>
<div class="detail-field">
<div class="detail-label">标准答案 ground_truth</div>
<div class="detail-gt">${App.escape(sample.ground_truth || "—")}</div>
</div>
${errorBlock}
</div>
`;
},
};

133
webapp/static/js/runner.js Normal file
View File

@@ -0,0 +1,133 @@
// runner.js — 新建评估视图:列出场景、触发评估、轮询任务状态与日志。
const Runner = {
selectedScenario: null,
pollTimer: null,
// 绑定运行按钮。
init() {
document.getElementById("run-btn").addEventListener("click", () => Runner.trigger());
document.getElementById("view-report-btn").addEventListener("click", () => {
if (Runner.lastRunId) {
App.currentRunId = Runner.lastRunId;
App.enableReportNav();
App.switchView("report");
}
});
},
// 加载并渲染可触发的场景列表。
async loadScenarios() {
const list = document.getElementById("scenario-list");
list.innerHTML = '<p class="muted">加载中…</p>';
try {
const data = await API.scenarios();
const scenarios = data.scenarios || [];
if (scenarios.length === 0) {
list.innerHTML = '<p class="muted">未在 scenarios/ 下找到场景文件。</p>';
return;
}
list.innerHTML = "";
scenarios.forEach((sc) => list.appendChild(Runner.renderScenarioItem(sc)));
} catch (err) {
list.innerHTML = `<p class="muted">加载失败:${App.escape(err.message)}</p>`;
}
},
// 构造单个场景条目。
renderScenarioItem(sc) {
const item = document.createElement("div");
const invalid = !!sc.error;
item.className = "scenario-item" + (invalid ? " invalid" : "");
const modeTag = sc.mode
? `<span class="tag mode-${App.escape(sc.mode)}">${App.escape(sc.mode)}</span>`
: "";
const metricCount = (sc.metrics || []).length;
item.innerHTML = `
<div>
<div class="scenario-name">${App.escape(sc.scenario_name || sc.path)}</div>
<div class="scenario-path">${App.escape(sc.path)}</div>
${sc.error ? `<div class="scenario-path" style="color:#dc2626">${App.escape(sc.error)}</div>` : ""}
</div>
<div class="scenario-tags">
${modeTag}
<span class="tag">${metricCount} 指标</span>
</div>
`;
if (!invalid) {
item.addEventListener("click", () => {
document.querySelectorAll(".scenario-item").forEach((el) => el.classList.remove("selected"));
item.classList.add("selected");
Runner.selectedScenario = sc.path;
document.getElementById("selected-scenario").textContent = sc.path;
document.getElementById("run-btn").disabled = false;
});
}
return item;
},
// 触发评估并开始轮询。
async trigger() {
if (!Runner.selectedScenario) return;
const runBtn = document.getElementById("run-btn");
runBtn.disabled = true;
const panel = document.getElementById("task-panel");
const logBox = document.getElementById("task-log");
const statusBadge = document.getElementById("task-status");
const reportBtn = document.getElementById("view-report-btn");
panel.hidden = false;
reportBtn.hidden = true;
logBox.textContent = "";
Runner._setStatus(statusBadge, "queued");
try {
const resp = await API.triggerEvaluation(Runner.selectedScenario);
Runner.poll(resp.task_id);
} catch (err) {
Runner._setStatus(statusBadge, "failed");
logBox.textContent = `触发失败:${err.message}`;
runBtn.disabled = false;
}
},
// 周期性轮询任务状态,刷新日志与徽标。
poll(taskId) {
const logBox = document.getElementById("task-log");
const statusBadge = document.getElementById("task-status");
const reportBtn = document.getElementById("view-report-btn");
const runBtn = document.getElementById("run-btn");
if (Runner.pollTimer) clearInterval(Runner.pollTimer);
Runner.pollTimer = setInterval(async () => {
try {
const status = await API.taskStatus(taskId);
logBox.textContent = (status.logs || []).join("\n");
logBox.scrollTop = logBox.scrollHeight;
Runner._setStatus(statusBadge, status.status);
if (status.status === "completed" || status.status === "failed") {
clearInterval(Runner.pollTimer);
runBtn.disabled = false;
if (status.status === "completed" && status.run_id) {
Runner.lastRunId = status.run_id;
reportBtn.hidden = false;
}
}
} catch (err) {
clearInterval(Runner.pollTimer);
logBox.textContent += `\n轮询失败:${err.message}`;
runBtn.disabled = false;
}
}, 1200);
},
// 更新状态徽标的文本与配色类。
_setStatus(badge, status) {
badge.textContent = status;
badge.className = "badge " + status;
},
};