Add RAGAS evaluation web console (FastAPI + vanilla JS)
- webapp/: FastAPI backend with runs/scenarios/evaluations API routers; services for run_reader, report_builder, scenario_scanner, task_manager (lazy ragas import — server boots even without ragas); Pydantic models - webapp/static/: single-page console (layout A: left-nav + main area); report detail with metric cards, Chart.js distribution histogram, grouping table, lowest-score sample review; trigger evaluation + log polling - webmain.py: uvicorn entry point (alongside existing main.py CLI) - start.bat: Windows one-click launcher with env checks and auto-browser open - rag_eval/datasets/: implement missing loader + normalizer modules (load_dataset_records, normalize_records) required by evaluator - scripts/seed_sample_run.py: generate realistic demo run artifacts - .gitignore: exclude datasets/ data files but keep rag_eval/datasets/ source Co-Authored-By: Claude Sonnet 4 <noreply@anthropic.com>
This commit is contained in:
267
webapp/static/css/app.css
Normal file
267
webapp/static/css/app.css
Normal file
@@ -0,0 +1,267 @@
|
||||
/* Siemens RAGAS 评估控制台 — 样式表
|
||||
配色取自西门子品牌色(petrol / 深青)与中性灰,呼应企业语境。 */
|
||||
|
||||
:root {
|
||||
--petrol: #009999;
|
||||
--petrol-dark: #007a7a;
|
||||
--ink: #0f1b2d;
|
||||
--ink-soft: #1a2942;
|
||||
--slate: #64748b;
|
||||
--slate-light: #94a3b8;
|
||||
--line: #e2e8f0;
|
||||
--bg: #f4f6f9;
|
||||
--surface: #ffffff;
|
||||
--good: #16a34a;
|
||||
--warn: #eab308;
|
||||
--bad: #dc2626;
|
||||
--shadow: 0 1px 3px rgba(15, 27, 45, 0.08), 0 1px 2px rgba(15, 27, 45, 0.04);
|
||||
--radius: 10px;
|
||||
font-synthesis: none;
|
||||
}
|
||||
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
body {
|
||||
font-family: "Segoe UI", "Microsoft YaHei", system-ui, -apple-system, sans-serif;
|
||||
background: var(--bg);
|
||||
color: var(--ink);
|
||||
font-size: 14px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.app { display: flex; min-height: 100vh; }
|
||||
|
||||
/* ---------- 左侧导航 ---------- */
|
||||
.sidebar {
|
||||
width: 208px;
|
||||
flex-shrink: 0;
|
||||
background: linear-gradient(180deg, var(--ink) 0%, var(--ink-soft) 100%);
|
||||
color: #cbd5e1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
padding: 20px 14px;
|
||||
position: sticky;
|
||||
top: 0;
|
||||
height: 100vh;
|
||||
}
|
||||
|
||||
.brand { padding: 0 8px 22px; }
|
||||
.brand-mark {
|
||||
font-size: 20px; font-weight: 700; letter-spacing: 1px; color: #fff;
|
||||
}
|
||||
.brand-sub { font-size: 12px; color: var(--petrol); margin-top: 2px; letter-spacing: 2px; }
|
||||
|
||||
.nav { display: flex; flex-direction: column; gap: 4px; flex: 1; }
|
||||
.nav-item {
|
||||
display: flex; align-items: center; gap: 10px;
|
||||
background: transparent; border: none; color: #cbd5e1;
|
||||
padding: 10px 12px; border-radius: 8px; cursor: pointer;
|
||||
font-size: 14px; text-align: left; width: 100%;
|
||||
transition: background 0.15s, color 0.15s;
|
||||
}
|
||||
.nav-item:hover { background: rgba(255, 255, 255, 0.06); color: #fff; }
|
||||
.nav-item.active { background: var(--petrol); color: #fff; }
|
||||
.nav-item.active .nav-ico { color: #fff; }
|
||||
.nav-item:disabled { opacity: 0.4; cursor: not-allowed; }
|
||||
.nav-ico { width: 18px; text-align: center; color: var(--petrol); font-weight: 700; }
|
||||
.nav-item.active .nav-ico { color: #fff; }
|
||||
|
||||
.sidebar-foot {
|
||||
display: flex; align-items: center; gap: 8px;
|
||||
font-size: 12px; color: var(--slate-light);
|
||||
padding: 12px 8px 0; border-top: 1px solid rgba(255, 255, 255, 0.08);
|
||||
}
|
||||
.dot { width: 8px; height: 8px; border-radius: 50%; background: var(--slate-light); }
|
||||
.dot.ok { background: var(--good); }
|
||||
.dot.bad { background: var(--bad); }
|
||||
|
||||
/* ---------- 主内容区 ---------- */
|
||||
.main { flex: 1; display: flex; flex-direction: column; min-width: 0; }
|
||||
|
||||
.topbar {
|
||||
display: flex; align-items: center; justify-content: space-between;
|
||||
padding: 18px 28px; background: var(--surface); border-bottom: 1px solid var(--line);
|
||||
position: sticky; top: 0; z-index: 5;
|
||||
}
|
||||
.topbar h1 { font-size: 18px; font-weight: 600; }
|
||||
|
||||
.view { padding: 24px 28px; }
|
||||
|
||||
/* ---------- 按钮 ---------- */
|
||||
.btn {
|
||||
border: 1px solid var(--line); background: var(--surface); color: var(--ink);
|
||||
padding: 8px 16px; border-radius: 8px; cursor: pointer; font-size: 13px;
|
||||
transition: all 0.15s; font-family: inherit;
|
||||
}
|
||||
.btn:hover { border-color: var(--petrol); color: var(--petrol); }
|
||||
.btn-primary { background: var(--petrol); border-color: var(--petrol); color: #fff; }
|
||||
.btn-primary:hover { background: var(--petrol-dark); border-color: var(--petrol-dark); color: #fff; }
|
||||
.btn-primary:disabled { background: var(--slate-light); border-color: var(--slate-light); cursor: not-allowed; }
|
||||
.btn-ghost { background: transparent; }
|
||||
|
||||
/* ---------- 运行列表 ---------- */
|
||||
.runs-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(320px, 1fr)); gap: 16px; }
|
||||
.run-card {
|
||||
background: var(--surface); border: 1px solid var(--line); border-radius: var(--radius);
|
||||
padding: 16px; cursor: pointer; transition: all 0.15s; box-shadow: var(--shadow);
|
||||
}
|
||||
.run-card:hover { border-color: var(--petrol); transform: translateY(-1px); }
|
||||
.run-card-head { display: flex; justify-content: space-between; align-items: flex-start; gap: 10px; }
|
||||
.run-card-title { font-size: 15px; font-weight: 600; word-break: break-all; }
|
||||
.run-card-meta { font-size: 12px; color: var(--slate); margin-top: 6px; line-height: 1.7; }
|
||||
.run-card-metrics { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 12px; }
|
||||
.metric-chip {
|
||||
font-size: 12px; padding: 3px 8px; border-radius: 6px; background: var(--bg);
|
||||
border: 1px solid var(--line);
|
||||
}
|
||||
.metric-chip b { font-variant-numeric: tabular-nums; }
|
||||
|
||||
/* ---------- 通用面板 ---------- */
|
||||
.panel {
|
||||
background: var(--surface); border: 1px solid var(--line); border-radius: var(--radius);
|
||||
padding: 20px; box-shadow: var(--shadow); margin-bottom: 18px;
|
||||
}
|
||||
.panel h2 { font-size: 16px; margin-bottom: 6px; }
|
||||
.panel-head { display: flex; align-items: center; justify-content: space-between; margin-bottom: 12px; }
|
||||
|
||||
.muted { color: var(--slate); }
|
||||
.tiny { font-size: 11px; margin-top: 8px; }
|
||||
.tight { margin: 0 !important; }
|
||||
code {
|
||||
background: var(--bg); border: 1px solid var(--line); border-radius: 4px;
|
||||
padding: 1px 6px; font-size: 12px; font-family: "Cascadia Code", Consolas, monospace;
|
||||
}
|
||||
|
||||
/* ---------- 新建评估 ---------- */
|
||||
.scenario-list { display: flex; flex-direction: column; gap: 8px; margin: 16px 0; }
|
||||
.scenario-item {
|
||||
display: flex; align-items: center; justify-content: space-between; gap: 12px;
|
||||
border: 1px solid var(--line); border-radius: 8px; padding: 12px 14px; cursor: pointer;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
.scenario-item:hover { border-color: var(--petrol); background: #f0fbfb; }
|
||||
.scenario-item.selected { border-color: var(--petrol); background: #e6f7f7; box-shadow: inset 0 0 0 1px var(--petrol); }
|
||||
.scenario-item.invalid { opacity: 0.55; cursor: not-allowed; }
|
||||
.scenario-name { font-weight: 600; font-size: 14px; }
|
||||
.scenario-path { font-size: 12px; color: var(--slate); font-family: monospace; }
|
||||
.scenario-tags { display: flex; gap: 6px; align-items: center; flex-shrink: 0; }
|
||||
.tag {
|
||||
font-size: 11px; padding: 2px 8px; border-radius: 999px; background: var(--bg);
|
||||
border: 1px solid var(--line); color: var(--slate);
|
||||
}
|
||||
.tag.mode-online { background: #eff6ff; color: #1d4ed8; border-color: #bfdbfe; }
|
||||
.tag.mode-offline { background: #f0fdf4; color: #15803d; border-color: #bbf7d0; }
|
||||
|
||||
.run-actions { display: flex; align-items: center; gap: 14px; }
|
||||
.selected-scenario { font-size: 13px; }
|
||||
|
||||
/* ---------- 任务进度 ---------- */
|
||||
.task-head { display: flex; align-items: center; gap: 12px; margin-bottom: 12px; }
|
||||
.badge {
|
||||
font-size: 12px; padding: 3px 10px; border-radius: 999px; font-weight: 600;
|
||||
background: var(--bg); color: var(--slate); border: 1px solid var(--line);
|
||||
}
|
||||
.badge.queued { background: #f1f5f9; color: var(--slate); }
|
||||
.badge.running { background: #fef9c3; color: #854d0e; border-color: #fde68a; }
|
||||
.badge.completed { background: #dcfce7; color: #166534; border-color: #bbf7d0; }
|
||||
.badge.failed { background: #fee2e2; color: #991b1b; border-color: #fecaca; }
|
||||
.log-box {
|
||||
background: #0b1220; color: #cbd5e1; border-radius: 8px; padding: 14px;
|
||||
font-family: "Cascadia Code", Consolas, monospace; font-size: 12px; line-height: 1.7;
|
||||
max-height: 320px; overflow-y: auto; white-space: pre-wrap; word-break: break-word;
|
||||
}
|
||||
.task-actions { margin-top: 12px; }
|
||||
|
||||
/* ---------- 报告详情 ---------- */
|
||||
.report-meta {
|
||||
background: var(--surface); border: 1px solid var(--line); border-radius: var(--radius);
|
||||
padding: 14px 18px; display: flex; justify-content: space-between; align-items: center;
|
||||
flex-wrap: wrap; gap: 10px; box-shadow: var(--shadow); margin-bottom: 18px;
|
||||
}
|
||||
.report-meta-title { font-size: 15px; font-weight: 600; }
|
||||
.report-meta-info { font-size: 12px; color: var(--slate); }
|
||||
.status-pill { font-size: 12px; font-weight: 600; }
|
||||
.status-pill.completed { color: var(--good); }
|
||||
|
||||
.section-label {
|
||||
font-size: 12px; font-weight: 600; letter-spacing: 0.5px; color: var(--slate);
|
||||
text-transform: uppercase; margin: 18px 0 10px;
|
||||
}
|
||||
|
||||
.metric-cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 12px; }
|
||||
.metric-card {
|
||||
background: var(--surface); border: 1px solid var(--line); border-radius: var(--radius);
|
||||
padding: 16px; text-align: center; box-shadow: var(--shadow);
|
||||
}
|
||||
.metric-value { font-size: 28px; font-weight: 700; font-variant-numeric: tabular-nums; }
|
||||
.metric-value.good { color: var(--good); }
|
||||
.metric-value.warn { color: var(--warn); }
|
||||
.metric-value.bad { color: var(--bad); }
|
||||
.metric-value.na { color: var(--slate-light); }
|
||||
.metric-name { font-size: 12px; color: var(--slate); margin-top: 4px; }
|
||||
|
||||
.report-row { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }
|
||||
.report-half { margin-bottom: 0; }
|
||||
|
||||
.select {
|
||||
border: 1px solid var(--line); border-radius: 6px; padding: 5px 10px; font-size: 12px;
|
||||
background: var(--surface); color: var(--ink); font-family: inherit; cursor: pointer;
|
||||
}
|
||||
|
||||
.grouping-tabs { display: flex; gap: 6px; margin-bottom: 10px; flex-wrap: wrap; }
|
||||
.grouping-tab {
|
||||
font-size: 12px; padding: 4px 10px; border-radius: 6px; border: 1px solid var(--line);
|
||||
background: var(--surface); cursor: pointer; color: var(--slate);
|
||||
}
|
||||
.grouping-tab.active { background: var(--petrol); color: #fff; border-color: var(--petrol); }
|
||||
|
||||
table.group-table { width: 100%; border-collapse: collapse; font-size: 12px; }
|
||||
table.group-table th, table.group-table td { padding: 6px 8px; text-align: left; }
|
||||
table.group-table th { color: var(--slate); border-bottom: 1px solid var(--line); font-weight: 600; }
|
||||
table.group-table td { border-bottom: 1px solid #f1f5f9; font-variant-numeric: tabular-nums; }
|
||||
|
||||
/* 最低分样本表 */
|
||||
.lowest-table {
|
||||
background: var(--surface); border: 1px solid var(--line); border-radius: var(--radius);
|
||||
overflow: hidden; box-shadow: var(--shadow);
|
||||
}
|
||||
.lowest-row {
|
||||
display: grid; grid-template-columns: 90px 1fr auto; gap: 12px; align-items: center;
|
||||
padding: 11px 16px; border-bottom: 1px solid #f1f5f9; cursor: pointer; transition: background 0.12s;
|
||||
}
|
||||
.lowest-row:hover { background: var(--bg); }
|
||||
.lowest-row .sid { font-size: 12px; color: var(--slate); font-family: monospace; }
|
||||
.lowest-row .q { font-size: 13px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
||||
.lowest-row .scores { display: flex; gap: 8px; }
|
||||
.score-badge {
|
||||
font-size: 12px; padding: 2px 8px; border-radius: 6px; font-variant-numeric: tabular-nums;
|
||||
font-weight: 600;
|
||||
}
|
||||
.score-badge.good { background: #dcfce7; color: #166534; }
|
||||
.score-badge.warn { background: #fef9c3; color: #854d0e; }
|
||||
.score-badge.bad { background: #fee2e2; color: #991b1b; }
|
||||
.score-badge.na { background: var(--bg); color: var(--slate-light); }
|
||||
|
||||
.lowest-detail { padding: 0 16px; background: #fcfdfe; border-bottom: 1px solid #f1f5f9; }
|
||||
.lowest-detail-inner { padding: 14px 0; font-size: 13px; line-height: 1.7; }
|
||||
.detail-field { margin-bottom: 10px; }
|
||||
.detail-label { font-size: 12px; color: var(--slate); font-weight: 600; margin-bottom: 3px; }
|
||||
.detail-context { color: #475569; font-size: 12px; }
|
||||
.detail-context .ctx-item {
|
||||
padding: 4px 0; border-bottom: 1px dashed var(--line);
|
||||
}
|
||||
.detail-gt { color: var(--good); }
|
||||
|
||||
.empty { text-align: center; padding: 60px 20px; color: var(--slate); }
|
||||
.empty p { margin-bottom: 8px; }
|
||||
|
||||
.spinner { display: inline-block; width: 14px; height: 14px; border: 2px solid var(--line);
|
||||
border-top-color: var(--petrol); border-radius: 50%; animation: spin 0.7s linear infinite;
|
||||
vertical-align: middle; }
|
||||
@keyframes spin { to { transform: rotate(360deg); } }
|
||||
|
||||
@media (max-width: 880px) {
|
||||
.report-row { grid-template-columns: 1fr; }
|
||||
.sidebar { width: 64px; }
|
||||
.brand-sub, .nav-item span:not(.nav-ico), .sidebar-foot span:last-child { display: none; }
|
||||
}
|
||||
118
webapp/static/index.html
Normal file
118
webapp/static/index.html
Normal file
@@ -0,0 +1,118 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Siemens RAGAS 评估控制台</title>
|
||||
<link rel="stylesheet" href="/static/css/app.css" />
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="app">
|
||||
<!-- 左侧导航(布局 A) -->
|
||||
<aside class="sidebar">
|
||||
<div class="brand">
|
||||
<div class="brand-mark">RAGAS</div>
|
||||
<div class="brand-sub">评估控制台</div>
|
||||
</div>
|
||||
<nav class="nav">
|
||||
<button class="nav-item" data-view="runs">
|
||||
<span class="nav-ico">▢</span><span>运行列表</span>
|
||||
</button>
|
||||
<button class="nav-item" data-view="new">
|
||||
<span class="nav-ico">+</span><span>新建评估</span>
|
||||
</button>
|
||||
<button class="nav-item" data-view="report" data-requires-run="1">
|
||||
<span class="nav-ico">▤</span><span>报告详情</span>
|
||||
</button>
|
||||
</nav>
|
||||
<div class="sidebar-foot">
|
||||
<span class="dot" id="health-dot"></span>
|
||||
<span id="health-text">连接中…</span>
|
||||
</div>
|
||||
</aside>
|
||||
|
||||
<!-- 主内容区 -->
|
||||
<main class="main">
|
||||
<header class="topbar">
|
||||
<h1 id="view-title">运行列表</h1>
|
||||
<button class="btn btn-ghost" id="refresh-btn">刷新</button>
|
||||
</header>
|
||||
|
||||
<!-- 运行列表视图 -->
|
||||
<section class="view" id="view-runs">
|
||||
<div id="runs-container" class="runs-grid"></div>
|
||||
<div class="empty" id="runs-empty" hidden>
|
||||
<p>暂无评估运行。</p>
|
||||
<p class="muted">从「新建评估」触发一次,或运行示例数据生成脚本:<code>python scripts/seed_sample_run.py</code></p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- 新建评估视图 -->
|
||||
<section class="view" id="view-new" hidden>
|
||||
<div class="panel">
|
||||
<h2>选择场景并运行</h2>
|
||||
<p class="muted">从 <code>scenarios/</code> 下选择一个场景配置,点击运行后在下方查看实时状态与日志。</p>
|
||||
<div class="scenario-list" id="scenario-list"></div>
|
||||
<div class="run-actions">
|
||||
<button class="btn btn-primary" id="run-btn" disabled>运行评估</button>
|
||||
<span class="selected-scenario muted" id="selected-scenario">未选择场景</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="panel" id="task-panel" hidden>
|
||||
<div class="task-head">
|
||||
<h2>评估进度</h2>
|
||||
<span class="badge" id="task-status">queued</span>
|
||||
</div>
|
||||
<pre class="log-box" id="task-log"></pre>
|
||||
<div class="task-actions">
|
||||
<button class="btn btn-primary" id="view-report-btn" hidden>查看报告</button>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- 报告详情视图 -->
|
||||
<section class="view" id="view-report" hidden>
|
||||
<div class="empty" id="report-empty">
|
||||
<p>请先从「运行列表」选择一次运行。</p>
|
||||
</div>
|
||||
<div id="report-content" hidden>
|
||||
<!-- 顶部元信息条 -->
|
||||
<div class="report-meta" id="report-meta"></div>
|
||||
|
||||
<!-- ① 指标均值卡片 -->
|
||||
<div class="section-label">① 指标均值 OVERVIEW</div>
|
||||
<div class="metric-cards" id="metric-cards"></div>
|
||||
|
||||
<!-- ② 分布 + ③ 分组 并排 -->
|
||||
<div class="report-row">
|
||||
<div class="panel report-half">
|
||||
<div class="panel-head">
|
||||
<div class="section-label tight">② 分数分布</div>
|
||||
<select id="dist-metric-select" class="select"></select>
|
||||
</div>
|
||||
<canvas id="dist-chart" height="160"></canvas>
|
||||
<p class="muted tiny">暴露长尾失败样本</p>
|
||||
</div>
|
||||
<div class="panel report-half">
|
||||
<div class="section-label tight">③ 分组均值</div>
|
||||
<div id="grouping-tabs" class="grouping-tabs"></div>
|
||||
<div id="grouping-table"></div>
|
||||
<p class="muted tiny">定位薄弱类别</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ④ 最低分样本逐条复核 -->
|
||||
<div class="section-label">④ 最低分样本(点击展开逐条复核)</div>
|
||||
<div class="lowest-table" id="lowest-table"></div>
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
</div>
|
||||
|
||||
<script src="/static/js/api.js"></script>
|
||||
<script src="/static/js/report.js"></script>
|
||||
<script src="/static/js/runner.js"></script>
|
||||
<script src="/static/js/app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
46
webapp/static/js/api.js
Normal file
46
webapp/static/js/api.js
Normal file
@@ -0,0 +1,46 @@
|
||||
// api.js — 控制台后端 HTTP 接口的轻量封装。
|
||||
|
||||
const API = {
|
||||
// 通用 JSON GET,失败时抛出带状态码的错误。
|
||||
async get(path) {
|
||||
const resp = await fetch(path);
|
||||
if (!resp.ok) {
|
||||
const detail = await API._extractError(resp);
|
||||
throw new Error(detail);
|
||||
}
|
||||
return resp.json();
|
||||
},
|
||||
|
||||
// 通用 JSON POST。
|
||||
async post(path, body) {
|
||||
const resp = await fetch(path, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body || {}),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
const detail = await API._extractError(resp);
|
||||
throw new Error(detail);
|
||||
}
|
||||
return resp.json();
|
||||
},
|
||||
|
||||
// 从错误响应中尽量解析出 detail 文本。
|
||||
async _extractError(resp) {
|
||||
try {
|
||||
const data = await resp.json();
|
||||
return data.detail || `请求失败 (${resp.status})`;
|
||||
} catch (_e) {
|
||||
return `请求失败 (${resp.status})`;
|
||||
}
|
||||
},
|
||||
|
||||
health() { return API.get("/api/health"); },
|
||||
runs() { return API.get("/api/runs"); },
|
||||
runDetail(runId) { return API.get(`/api/runs/${encodeURIComponent(runId)}`); },
|
||||
scenarios() { return API.get("/api/scenarios"); },
|
||||
triggerEvaluation(scenarioPath) {
|
||||
return API.post("/api/evaluations", { scenario_path: scenarioPath });
|
||||
},
|
||||
taskStatus(taskId) { return API.get(`/api/evaluations/${encodeURIComponent(taskId)}`); },
|
||||
};
|
||||
152
webapp/static/js/app.js
Normal file
152
webapp/static/js/app.js
Normal file
@@ -0,0 +1,152 @@
|
||||
// app.js — 视图路由、运行列表渲染、健康检查。整个控制台的入口编排。
|
||||
|
||||
const App = {
|
||||
currentRunId: null,
|
||||
views: ["runs", "new", "report"],
|
||||
titles: { runs: "运行列表", new: "新建评估", report: "报告详情" },
|
||||
|
||||
// 初始化:绑定导航、加载首屏、启动健康检查。
|
||||
init() {
|
||||
document.querySelectorAll(".nav-item").forEach((btn) => {
|
||||
btn.addEventListener("click", () => App.switchView(btn.dataset.view));
|
||||
});
|
||||
document.getElementById("refresh-btn").addEventListener("click", () => App.refreshCurrent());
|
||||
|
||||
Runner.init();
|
||||
App.switchView("runs");
|
||||
App.checkHealth();
|
||||
setInterval(App.checkHealth, 15000);
|
||||
},
|
||||
|
||||
// 切换主视图,并同步导航高亮与标题。
|
||||
switchView(view) {
|
||||
if (view === "report" && !App.currentRunId) {
|
||||
// 没有选中的运行时,报告页显示占位。
|
||||
}
|
||||
App.views.forEach((name) => {
|
||||
const el = document.getElementById(`view-${name}`);
|
||||
if (el) el.hidden = name !== view;
|
||||
});
|
||||
document.querySelectorAll(".nav-item").forEach((btn) => {
|
||||
btn.classList.toggle("active", btn.dataset.view === view);
|
||||
});
|
||||
document.getElementById("view-title").textContent = App.titles[view] || view;
|
||||
App.activeView = view;
|
||||
|
||||
if (view === "runs") App.loadRuns();
|
||||
if (view === "new") Runner.loadScenarios();
|
||||
if (view === "report") Report.render(App.currentRunId);
|
||||
},
|
||||
|
||||
// 刷新当前视图的数据。
|
||||
refreshCurrent() {
|
||||
App.switchView(App.activeView || "runs");
|
||||
},
|
||||
|
||||
// 加载并渲染运行列表。
|
||||
async loadRuns() {
|
||||
const container = document.getElementById("runs-container");
|
||||
const empty = document.getElementById("runs-empty");
|
||||
container.innerHTML = '<p class="muted">加载中…</p>';
|
||||
try {
|
||||
const data = await API.runs();
|
||||
const runs = data.runs || [];
|
||||
if (runs.length === 0) {
|
||||
container.innerHTML = "";
|
||||
empty.hidden = false;
|
||||
return;
|
||||
}
|
||||
empty.hidden = true;
|
||||
container.innerHTML = "";
|
||||
runs.forEach((run) => container.appendChild(App.renderRunCard(run)));
|
||||
} catch (err) {
|
||||
container.innerHTML = `<p class="muted">加载失败:${App.escape(err.message)}</p>`;
|
||||
}
|
||||
},
|
||||
|
||||
// 构造一张运行卡片。
|
||||
renderRunCard(run) {
|
||||
const card = document.createElement("div");
|
||||
card.className = "run-card";
|
||||
card.addEventListener("click", () => {
|
||||
App.currentRunId = run.run_id;
|
||||
App.enableReportNav();
|
||||
App.switchView("report");
|
||||
});
|
||||
|
||||
const chips = (run.metrics || [])
|
||||
.map((m) => {
|
||||
const val = run.metric_means ? run.metric_means[m] : null;
|
||||
const cls = App.scoreClass(val);
|
||||
const text = val === null || val === undefined ? "n/a" : val.toFixed(2);
|
||||
return `<span class="metric-chip">${App.escape(App.shortMetric(m))} <b class="${cls}">${text}</b></span>`;
|
||||
})
|
||||
.join("");
|
||||
|
||||
card.innerHTML = `
|
||||
<div class="run-card-head">
|
||||
<div class="run-card-title">${App.escape(run.scenario_name || run.run_id)}</div>
|
||||
</div>
|
||||
<div class="run-card-meta">
|
||||
<div>${App.escape(run.mode || "—")} · judge: ${App.escape(run.judge_model || "—")}</div>
|
||||
<div>${run.valid_samples} 有效 / ${run.invalid_samples} 无效 · ${App.escape(App.shortTime(run.finished_at))}</div>
|
||||
</div>
|
||||
<div class="run-card-metrics">${chips}</div>
|
||||
`;
|
||||
return card;
|
||||
},
|
||||
|
||||
// 启用报告导航项(选中运行后)。
|
||||
enableReportNav() {
|
||||
const btn = document.querySelector('.nav-item[data-view="report"]');
|
||||
if (btn) btn.disabled = false;
|
||||
},
|
||||
|
||||
// 根据分值返回 good/warn/bad/na 配色类。
|
||||
scoreClass(value) {
|
||||
if (value === null || value === undefined) return "na";
|
||||
if (value >= 0.8) return "good";
|
||||
if (value >= 0.65) return "warn";
|
||||
return "bad";
|
||||
},
|
||||
|
||||
// 指标名缩写,节省卡片横向空间。
|
||||
shortMetric(name) {
|
||||
const map = {
|
||||
faithfulness: "faith.",
|
||||
answer_relevancy: "ans.rel.",
|
||||
context_recall: "ctx.recall",
|
||||
context_precision: "ctx.prec.",
|
||||
};
|
||||
return map[name] || name;
|
||||
},
|
||||
|
||||
// 截取时间戳到分钟,便于阅读。
|
||||
shortTime(iso) {
|
||||
if (!iso) return "—";
|
||||
return String(iso).replace("T", " ").slice(0, 16);
|
||||
},
|
||||
|
||||
// 简单 HTML 转义,防止注入。
|
||||
escape(text) {
|
||||
const div = document.createElement("div");
|
||||
div.textContent = text == null ? "" : String(text);
|
||||
return div.innerHTML;
|
||||
},
|
||||
|
||||
// 健康检查,更新左下角状态点。
|
||||
async checkHealth() {
|
||||
const dot = document.getElementById("health-dot");
|
||||
const label = document.getElementById("health-text");
|
||||
try {
|
||||
await API.health();
|
||||
dot.className = "dot ok";
|
||||
label.textContent = "服务正常";
|
||||
} catch (_e) {
|
||||
dot.className = "dot bad";
|
||||
label.textContent = "服务离线";
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
document.addEventListener("DOMContentLoaded", App.init);
|
||||
258
webapp/static/js/report.js
Normal file
258
webapp/static/js/report.js
Normal file
@@ -0,0 +1,258 @@
|
||||
// report.js — 报告详情页渲染:元信息、指标卡片、分布图、分组表、低分样本复核。
|
||||
|
||||
const Report = {
|
||||
distChart: null,
|
||||
currentDetail: null,
|
||||
activeGrouping: null,
|
||||
|
||||
// 加载并渲染指定运行的完整报告。
|
||||
async render(runId) {
|
||||
const empty = document.getElementById("report-empty");
|
||||
const content = document.getElementById("report-content");
|
||||
if (!runId) {
|
||||
empty.hidden = false;
|
||||
content.hidden = true;
|
||||
return;
|
||||
}
|
||||
empty.hidden = true;
|
||||
content.hidden = false;
|
||||
content.style.opacity = "0.4";
|
||||
|
||||
try {
|
||||
const detail = await API.runDetail(runId);
|
||||
Report.currentDetail = detail;
|
||||
Report.renderMeta(detail.summary);
|
||||
Report.renderMetricCards(detail.summary, detail.report);
|
||||
Report.renderDistribution(detail.report);
|
||||
Report.renderGroupings(detail.report);
|
||||
Report.renderLowest(detail.report);
|
||||
content.style.opacity = "1";
|
||||
} catch (err) {
|
||||
empty.hidden = false;
|
||||
content.hidden = true;
|
||||
empty.innerHTML = `<p>加载报告失败:${App.escape(err.message)}</p>`;
|
||||
}
|
||||
},
|
||||
|
||||
// 顶部元信息条。
|
||||
renderMeta(summary) {
|
||||
const el = document.getElementById("report-meta");
|
||||
el.innerHTML = `
|
||||
<div>
|
||||
<div class="report-meta-title">${App.escape(summary.scenario_name || summary.run_id)}
|
||||
<span class="status-pill completed">● completed</span></div>
|
||||
<div class="report-meta-info">run_id: ${App.escape(summary.run_id)}</div>
|
||||
</div>
|
||||
<div class="report-meta-info">
|
||||
${App.escape(summary.mode || "—")} · judge: ${App.escape(summary.judge_model || "—")}
|
||||
· ${summary.total_samples} 样本 (${summary.valid_samples} 有效 / ${summary.invalid_samples} 无效)
|
||||
· ${App.escape(App.shortTime(summary.finished_at))}
|
||||
</div>
|
||||
`;
|
||||
},
|
||||
|
||||
// ① 指标均值卡片。
|
||||
renderMetricCards(summary, report) {
|
||||
const wrap = document.getElementById("metric-cards");
|
||||
wrap.innerHTML = "";
|
||||
const metrics = report.metrics && report.metrics.length ? report.metrics : summary.metrics;
|
||||
metrics.forEach((metric) => {
|
||||
const value = report.metric_means ? report.metric_means[metric] : null;
|
||||
const cls = App.scoreClass(value);
|
||||
const text = value === null || value === undefined ? "n/a" : value.toFixed(2);
|
||||
const card = document.createElement("div");
|
||||
card.className = "metric-card";
|
||||
card.innerHTML = `
|
||||
<div class="metric-value ${cls}">${text}</div>
|
||||
<div class="metric-name">${App.escape(metric)}</div>
|
||||
`;
|
||||
wrap.appendChild(card);
|
||||
});
|
||||
},
|
||||
|
||||
// ② 分数分布直方图(可切换指标)。
|
||||
renderDistribution(report) {
|
||||
const select = document.getElementById("dist-metric-select");
|
||||
const distributions = report.distributions || {};
|
||||
const metricsWithDist = Object.keys(distributions);
|
||||
|
||||
select.innerHTML = "";
|
||||
if (metricsWithDist.length === 0) {
|
||||
Report._drawDistChart([], []);
|
||||
return;
|
||||
}
|
||||
metricsWithDist.forEach((metric) => {
|
||||
const opt = document.createElement("option");
|
||||
opt.value = metric;
|
||||
opt.textContent = metric;
|
||||
select.appendChild(opt);
|
||||
});
|
||||
select.onchange = () => Report._updateDistChart(select.value);
|
||||
Report._updateDistChart(metricsWithDist[0]);
|
||||
},
|
||||
|
||||
// 用选定指标的分箱数据刷新直方图。
|
||||
_updateDistChart(metric) {
|
||||
const distributions = Report.currentDetail.report.distributions || {};
|
||||
const bins = distributions[metric] || [];
|
||||
const labels = bins.map((b) => b.label);
|
||||
const counts = bins.map((b) => b.count);
|
||||
const colors = bins.map((b) => Report._binColor(b.lower));
|
||||
Report._drawDistChart(labels, counts, colors);
|
||||
},
|
||||
|
||||
// 低分箱偏红、高分箱偏绿,直观暴露长尾。
|
||||
_binColor(lower) {
|
||||
if (lower >= 0.8) return "#16a34a";
|
||||
if (lower >= 0.6) return "#84cc16";
|
||||
if (lower >= 0.4) return "#eab308";
|
||||
if (lower >= 0.2) return "#f97316";
|
||||
return "#dc2626";
|
||||
},
|
||||
|
||||
// 实际绘制 Chart.js 柱状图。
|
||||
_drawDistChart(labels, counts, colors) {
|
||||
const canvas = document.getElementById("dist-chart");
|
||||
if (Report.distChart) Report.distChart.destroy();
|
||||
Report.distChart = new Chart(canvas, {
|
||||
type: "bar",
|
||||
data: {
|
||||
labels,
|
||||
datasets: [{ data: counts, backgroundColor: colors || "#009999", borderRadius: 4 }],
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
plugins: { legend: { display: false } },
|
||||
scales: {
|
||||
y: { beginAtZero: true, ticks: { precision: 0 }, grid: { color: "#f1f5f9" } },
|
||||
x: { grid: { display: false } },
|
||||
},
|
||||
},
|
||||
});
|
||||
},
|
||||
|
||||
// ③ 分组均值(difficulty / question_type / language)。
|
||||
renderGroupings(report) {
|
||||
const tabsEl = document.getElementById("grouping-tabs");
|
||||
const tableEl = document.getElementById("grouping-table");
|
||||
const groupings = report.groupings || {};
|
||||
const fields = Object.keys(groupings);
|
||||
|
||||
tabsEl.innerHTML = "";
|
||||
if (fields.length === 0) {
|
||||
tableEl.innerHTML = '<p class="muted tiny">数据集未包含可分组字段(difficulty / question_type)。</p>';
|
||||
return;
|
||||
}
|
||||
|
||||
const fieldLabels = { difficulty: "难度", question_type: "类型", language: "语言" };
|
||||
Report.activeGrouping = fields[0];
|
||||
fields.forEach((field) => {
|
||||
const tab = document.createElement("button");
|
||||
tab.className = "grouping-tab" + (field === Report.activeGrouping ? " active" : "");
|
||||
tab.textContent = fieldLabels[field] || field;
|
||||
tab.onclick = () => {
|
||||
Report.activeGrouping = field;
|
||||
tabsEl.querySelectorAll(".grouping-tab").forEach((t) => t.classList.remove("active"));
|
||||
tab.classList.add("active");
|
||||
Report._drawGroupTable(report, field);
|
||||
};
|
||||
tabsEl.appendChild(tab);
|
||||
});
|
||||
Report._drawGroupTable(report, Report.activeGrouping);
|
||||
},
|
||||
|
||||
// 渲染单个分组字段的均值表。
|
||||
_drawGroupTable(report, field) {
|
||||
const tableEl = document.getElementById("grouping-table");
|
||||
const stats = report.groupings[field] || [];
|
||||
const metrics = report.metrics || [];
|
||||
|
||||
let head = "<tr><th>组</th><th>样本</th>";
|
||||
metrics.forEach((m) => (head += `<th>${App.escape(App.shortMetric(m))}</th>`));
|
||||
head += "</tr>";
|
||||
|
||||
let body = "";
|
||||
stats.forEach((stat) => {
|
||||
body += `<tr><td>${App.escape(stat.key)}</td><td>${stat.count}</td>`;
|
||||
metrics.forEach((m) => {
|
||||
const v = stat.means ? stat.means[m] : null;
|
||||
const cls = App.scoreClass(v);
|
||||
const text = v === null || v === undefined ? "—" : v.toFixed(2);
|
||||
body += `<td class="${cls}">${text}</td>`;
|
||||
});
|
||||
body += "</tr>";
|
||||
});
|
||||
tableEl.innerHTML = `<table class="group-table">${head}${body}</table>`;
|
||||
},
|
||||
|
||||
// ④ 最低分样本逐条复核表(点击展开)。
|
||||
renderLowest(report) {
|
||||
const wrap = document.getElementById("lowest-table");
|
||||
const samples = report.lowest_samples || [];
|
||||
wrap.innerHTML = "";
|
||||
if (samples.length === 0) {
|
||||
wrap.innerHTML = '<div class="lowest-detail-inner" style="padding:16px">暂无可复核样本。</div>';
|
||||
return;
|
||||
}
|
||||
const metrics = report.metrics || [];
|
||||
samples.forEach((sample, idx) => {
|
||||
const row = document.createElement("div");
|
||||
row.className = "lowest-row";
|
||||
const scoreBadges = metrics
|
||||
.map((m) => {
|
||||
const v = sample.metrics ? sample.metrics[m] : null;
|
||||
const cls = App.scoreClass(v);
|
||||
const text = v === null || v === undefined ? "—" : v.toFixed(2);
|
||||
return `<span class="score-badge ${cls}" title="${App.escape(m)}">${text}</span>`;
|
||||
})
|
||||
.join("");
|
||||
row.innerHTML = `
|
||||
<span class="sid">${App.escape(sample.sample_id)}</span>
|
||||
<span class="q">${App.escape(sample.question || "—")}</span>
|
||||
<span class="scores">${scoreBadges}</span>
|
||||
`;
|
||||
|
||||
const detail = document.createElement("div");
|
||||
detail.className = "lowest-detail";
|
||||
detail.hidden = true;
|
||||
detail.innerHTML = Report._detailHtml(sample);
|
||||
|
||||
row.addEventListener("click", () => {
|
||||
detail.hidden = !detail.hidden;
|
||||
});
|
||||
wrap.appendChild(row);
|
||||
wrap.appendChild(detail);
|
||||
});
|
||||
},
|
||||
|
||||
// 单条样本的展开详情:question / contexts / answer / ground_truth。
|
||||
_detailHtml(sample) {
|
||||
const contexts = (sample.contexts || [])
|
||||
.map((c, i) => `<div class="ctx-item">[${i + 1}] ${App.escape(c)}</div>`)
|
||||
.join("");
|
||||
const errorBlock = sample.error
|
||||
? `<div class="detail-field"><div class="detail-label">错误 error</div><div style="color:#dc2626">${App.escape(sample.error)}</div></div>`
|
||||
: "";
|
||||
return `
|
||||
<div class="lowest-detail-inner">
|
||||
<div class="detail-field">
|
||||
<div class="detail-label">问题 question</div>
|
||||
<div>${App.escape(sample.question || "—")}</div>
|
||||
</div>
|
||||
<div class="detail-field">
|
||||
<div class="detail-label">检索片段 contexts</div>
|
||||
<div class="detail-context">${contexts || "(空)"}</div>
|
||||
</div>
|
||||
<div class="detail-field">
|
||||
<div class="detail-label">生成答案 answer</div>
|
||||
<div>${App.escape(sample.answer || "—")}</div>
|
||||
</div>
|
||||
<div class="detail-field">
|
||||
<div class="detail-label">标准答案 ground_truth</div>
|
||||
<div class="detail-gt">${App.escape(sample.ground_truth || "—")}</div>
|
||||
</div>
|
||||
${errorBlock}
|
||||
</div>
|
||||
`;
|
||||
},
|
||||
};
|
||||
133
webapp/static/js/runner.js
Normal file
133
webapp/static/js/runner.js
Normal file
@@ -0,0 +1,133 @@
|
||||
// runner.js — 新建评估视图:列出场景、触发评估、轮询任务状态与日志。
|
||||
|
||||
const Runner = {
|
||||
selectedScenario: null,
|
||||
pollTimer: null,
|
||||
|
||||
// 绑定运行按钮。
|
||||
init() {
|
||||
document.getElementById("run-btn").addEventListener("click", () => Runner.trigger());
|
||||
document.getElementById("view-report-btn").addEventListener("click", () => {
|
||||
if (Runner.lastRunId) {
|
||||
App.currentRunId = Runner.lastRunId;
|
||||
App.enableReportNav();
|
||||
App.switchView("report");
|
||||
}
|
||||
});
|
||||
},
|
||||
|
||||
// 加载并渲染可触发的场景列表。
|
||||
async loadScenarios() {
|
||||
const list = document.getElementById("scenario-list");
|
||||
list.innerHTML = '<p class="muted">加载中…</p>';
|
||||
try {
|
||||
const data = await API.scenarios();
|
||||
const scenarios = data.scenarios || [];
|
||||
if (scenarios.length === 0) {
|
||||
list.innerHTML = '<p class="muted">未在 scenarios/ 下找到场景文件。</p>';
|
||||
return;
|
||||
}
|
||||
list.innerHTML = "";
|
||||
scenarios.forEach((sc) => list.appendChild(Runner.renderScenarioItem(sc)));
|
||||
} catch (err) {
|
||||
list.innerHTML = `<p class="muted">加载失败:${App.escape(err.message)}</p>`;
|
||||
}
|
||||
},
|
||||
|
||||
// 构造单个场景条目。
|
||||
renderScenarioItem(sc) {
|
||||
const item = document.createElement("div");
|
||||
const invalid = !!sc.error;
|
||||
item.className = "scenario-item" + (invalid ? " invalid" : "");
|
||||
|
||||
const modeTag = sc.mode
|
||||
? `<span class="tag mode-${App.escape(sc.mode)}">${App.escape(sc.mode)}</span>`
|
||||
: "";
|
||||
const metricCount = (sc.metrics || []).length;
|
||||
|
||||
item.innerHTML = `
|
||||
<div>
|
||||
<div class="scenario-name">${App.escape(sc.scenario_name || sc.path)}</div>
|
||||
<div class="scenario-path">${App.escape(sc.path)}</div>
|
||||
${sc.error ? `<div class="scenario-path" style="color:#dc2626">${App.escape(sc.error)}</div>` : ""}
|
||||
</div>
|
||||
<div class="scenario-tags">
|
||||
${modeTag}
|
||||
<span class="tag">${metricCount} 指标</span>
|
||||
</div>
|
||||
`;
|
||||
|
||||
if (!invalid) {
|
||||
item.addEventListener("click", () => {
|
||||
document.querySelectorAll(".scenario-item").forEach((el) => el.classList.remove("selected"));
|
||||
item.classList.add("selected");
|
||||
Runner.selectedScenario = sc.path;
|
||||
document.getElementById("selected-scenario").textContent = sc.path;
|
||||
document.getElementById("run-btn").disabled = false;
|
||||
});
|
||||
}
|
||||
return item;
|
||||
},
|
||||
|
||||
// 触发评估并开始轮询。
|
||||
async trigger() {
|
||||
if (!Runner.selectedScenario) return;
|
||||
const runBtn = document.getElementById("run-btn");
|
||||
runBtn.disabled = true;
|
||||
|
||||
const panel = document.getElementById("task-panel");
|
||||
const logBox = document.getElementById("task-log");
|
||||
const statusBadge = document.getElementById("task-status");
|
||||
const reportBtn = document.getElementById("view-report-btn");
|
||||
panel.hidden = false;
|
||||
reportBtn.hidden = true;
|
||||
logBox.textContent = "";
|
||||
Runner._setStatus(statusBadge, "queued");
|
||||
|
||||
try {
|
||||
const resp = await API.triggerEvaluation(Runner.selectedScenario);
|
||||
Runner.poll(resp.task_id);
|
||||
} catch (err) {
|
||||
Runner._setStatus(statusBadge, "failed");
|
||||
logBox.textContent = `触发失败:${err.message}`;
|
||||
runBtn.disabled = false;
|
||||
}
|
||||
},
|
||||
|
||||
// 周期性轮询任务状态,刷新日志与徽标。
|
||||
poll(taskId) {
|
||||
const logBox = document.getElementById("task-log");
|
||||
const statusBadge = document.getElementById("task-status");
|
||||
const reportBtn = document.getElementById("view-report-btn");
|
||||
const runBtn = document.getElementById("run-btn");
|
||||
|
||||
if (Runner.pollTimer) clearInterval(Runner.pollTimer);
|
||||
Runner.pollTimer = setInterval(async () => {
|
||||
try {
|
||||
const status = await API.taskStatus(taskId);
|
||||
logBox.textContent = (status.logs || []).join("\n");
|
||||
logBox.scrollTop = logBox.scrollHeight;
|
||||
Runner._setStatus(statusBadge, status.status);
|
||||
|
||||
if (status.status === "completed" || status.status === "failed") {
|
||||
clearInterval(Runner.pollTimer);
|
||||
runBtn.disabled = false;
|
||||
if (status.status === "completed" && status.run_id) {
|
||||
Runner.lastRunId = status.run_id;
|
||||
reportBtn.hidden = false;
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
clearInterval(Runner.pollTimer);
|
||||
logBox.textContent += `\n轮询失败:${err.message}`;
|
||||
runBtn.disabled = false;
|
||||
}
|
||||
}, 1200);
|
||||
},
|
||||
|
||||
// 更新状态徽标的文本与配色类。
|
||||
_setStatus(badge, status) {
|
||||
badge.textContent = status;
|
||||
badge.className = "badge " + status;
|
||||
},
|
||||
};
|
||||
Reference in New Issue
Block a user