Covers: overview, architecture, modules, data flows (4 flows), RAGAS metrics (7), API reference, weight config, deployment, tech stack, directory structure. Self-contained HTML with Siemens teal theme, sidebar scrollspy, responsive layout. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1102 lines
41 KiB
HTML
1102 lines
41 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="zh-CN">
|
||
<head>
|
||
<meta charset="UTF-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
<title>Siemens RAGAS 项目总览</title>
|
||
<style>
|
||
:root {
|
||
--siemens-teal: #009999;
|
||
--siemens-blue: #0066CC;
|
||
--bg: #f8fafc;
|
||
--surface: #ffffff;
|
||
--surface-soft: #eef6f8;
|
||
--text: #0f172a;
|
||
--muted: #475569;
|
||
--border: #dbe4ee;
|
||
--code-bg: #1e293b;
|
||
--code-text: #e2e8f0;
|
||
--shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
|
||
--radius: 18px;
|
||
--sidebar-width: 260px;
|
||
--content-max: 1360px;
|
||
}
|
||
|
||
* { box-sizing: border-box; }
|
||
html { scroll-behavior: smooth; }
|
||
body {
|
||
margin: 0;
|
||
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
||
color: var(--text);
|
||
background: linear-gradient(180deg, #f8fafc 0%, #eff7fb 100%);
|
||
}
|
||
|
||
a { color: inherit; text-decoration: none; }
|
||
code, pre { font-family: "Cascadia Code", "SFMono-Regular", Consolas, monospace; }
|
||
|
||
.layout {
|
||
display: flex;
|
||
min-height: 100vh;
|
||
}
|
||
|
||
.sidebar {
|
||
position: fixed;
|
||
inset: 0 auto 0 0;
|
||
width: var(--sidebar-width);
|
||
background: linear-gradient(180deg, #062630 0%, #083947 42%, #0a4a65 100%);
|
||
color: #e6fffb;
|
||
padding: 26px 18px 24px;
|
||
overflow-y: auto;
|
||
box-shadow: 4px 0 18px rgba(0, 0, 0, 0.12);
|
||
z-index: 20;
|
||
}
|
||
|
||
.brand {
|
||
margin-bottom: 22px;
|
||
padding-bottom: 18px;
|
||
border-bottom: 1px solid rgba(255, 255, 255, 0.14);
|
||
}
|
||
|
||
.brand h1 {
|
||
margin: 0;
|
||
font-size: 1.22rem;
|
||
line-height: 1.35;
|
||
}
|
||
|
||
.brand p {
|
||
margin: 10px 0 0;
|
||
font-size: 0.92rem;
|
||
color: rgba(230, 255, 251, 0.78);
|
||
}
|
||
|
||
.sidebar nav {
|
||
display: grid;
|
||
gap: 8px;
|
||
}
|
||
|
||
.sidebar nav a {
|
||
display: block;
|
||
padding: 10px 12px;
|
||
border-radius: 12px;
|
||
color: rgba(255, 255, 255, 0.82);
|
||
font-size: 0.95rem;
|
||
transition: 0.2s ease;
|
||
border: 1px solid transparent;
|
||
}
|
||
|
||
.sidebar nav a:hover,
|
||
.sidebar nav a.active {
|
||
background: rgba(255, 255, 255, 0.12);
|
||
color: #fff;
|
||
border-color: rgba(255, 255, 255, 0.18);
|
||
transform: translateX(2px);
|
||
}
|
||
|
||
.sidebar .meta {
|
||
margin-top: 22px;
|
||
padding-top: 16px;
|
||
border-top: 1px solid rgba(255, 255, 255, 0.14);
|
||
font-size: 0.84rem;
|
||
color: rgba(230, 255, 251, 0.72);
|
||
line-height: 1.7;
|
||
}
|
||
|
||
.main {
|
||
margin-left: var(--sidebar-width);
|
||
width: calc(100% - var(--sidebar-width));
|
||
padding: 28px;
|
||
}
|
||
|
||
.container {
|
||
max-width: var(--content-max);
|
||
margin: 0 auto;
|
||
}
|
||
|
||
.hero {
|
||
position: relative;
|
||
overflow: hidden;
|
||
background: radial-gradient(circle at top right, rgba(0, 102, 204, 0.22), transparent 34%),
|
||
linear-gradient(135deg, #ffffff 0%, #effafb 55%, #e6f2ff 100%);
|
||
border: 1px solid rgba(0, 153, 153, 0.15);
|
||
border-radius: 26px;
|
||
padding: 34px;
|
||
box-shadow: var(--shadow);
|
||
margin-bottom: 28px;
|
||
}
|
||
|
||
.hero::after {
|
||
content: "";
|
||
position: absolute;
|
||
right: -60px;
|
||
top: -60px;
|
||
width: 220px;
|
||
height: 220px;
|
||
border-radius: 50%;
|
||
background: radial-gradient(circle, rgba(0, 153, 153, 0.18), transparent 70%);
|
||
pointer-events: none;
|
||
}
|
||
|
||
.eyebrow {
|
||
display: inline-flex;
|
||
align-items: center;
|
||
gap: 8px;
|
||
padding: 6px 12px;
|
||
border-radius: 999px;
|
||
background: rgba(0, 153, 153, 0.08);
|
||
color: var(--siemens-teal);
|
||
font-weight: 700;
|
||
font-size: 0.86rem;
|
||
margin-bottom: 16px;
|
||
}
|
||
|
||
.hero h2 {
|
||
margin: 0;
|
||
font-size: clamp(2rem, 3vw, 3.1rem);
|
||
line-height: 1.12;
|
||
letter-spacing: -0.03em;
|
||
max-width: 900px;
|
||
}
|
||
|
||
.hero p {
|
||
max-width: 900px;
|
||
font-size: 1.02rem;
|
||
line-height: 1.8;
|
||
color: var(--muted);
|
||
margin: 16px 0 0;
|
||
}
|
||
|
||
.hero-grid {
|
||
display: grid;
|
||
grid-template-columns: 1.5fr 1fr;
|
||
gap: 24px;
|
||
align-items: end;
|
||
margin-top: 24px;
|
||
}
|
||
|
||
.hero-stats {
|
||
display: grid;
|
||
grid-template-columns: repeat(4, minmax(0, 1fr));
|
||
gap: 14px;
|
||
}
|
||
|
||
.stat {
|
||
background: rgba(255, 255, 255, 0.8);
|
||
border: 1px solid rgba(0, 102, 204, 0.1);
|
||
border-radius: 18px;
|
||
padding: 16px;
|
||
box-shadow: var(--shadow);
|
||
}
|
||
|
||
.stat b {
|
||
display: block;
|
||
font-size: 1.55rem;
|
||
color: var(--siemens-blue);
|
||
margin-bottom: 6px;
|
||
}
|
||
|
||
.stat span {
|
||
color: var(--muted);
|
||
font-size: 0.92rem;
|
||
}
|
||
|
||
.hero-note {
|
||
background: linear-gradient(135deg, rgba(0, 153, 153, 0.12), rgba(0, 102, 204, 0.1));
|
||
border: 1px solid rgba(0, 153, 153, 0.14);
|
||
border-radius: 20px;
|
||
padding: 20px;
|
||
line-height: 1.8;
|
||
color: #103451;
|
||
}
|
||
|
||
.hero-note b { color: var(--siemens-blue); }
|
||
|
||
section {
|
||
background: var(--surface);
|
||
border: 1px solid var(--border);
|
||
border-radius: 22px;
|
||
padding: 28px;
|
||
box-shadow: var(--shadow);
|
||
margin-bottom: 24px;
|
||
scroll-margin-top: 18px;
|
||
}
|
||
|
||
.section-title {
|
||
display: flex;
|
||
align-items: center;
|
||
gap: 14px;
|
||
margin: 0 0 18px;
|
||
font-size: 1.5rem;
|
||
letter-spacing: -0.02em;
|
||
}
|
||
|
||
.section-title::before {
|
||
content: "";
|
||
width: 6px;
|
||
height: 28px;
|
||
border-radius: 999px;
|
||
background: linear-gradient(180deg, var(--siemens-teal), var(--siemens-blue));
|
||
flex: 0 0 auto;
|
||
}
|
||
|
||
.section-intro {
|
||
margin: 0 0 18px;
|
||
color: var(--muted);
|
||
line-height: 1.85;
|
||
}
|
||
|
||
.badges,
|
||
.stack-badges {
|
||
display: flex;
|
||
flex-wrap: wrap;
|
||
gap: 10px;
|
||
}
|
||
|
||
.badge {
|
||
display: inline-flex;
|
||
align-items: center;
|
||
gap: 8px;
|
||
padding: 9px 14px;
|
||
border-radius: 999px;
|
||
background: linear-gradient(135deg, rgba(0, 153, 153, 0.12), rgba(0, 102, 204, 0.1));
|
||
color: #0f4060;
|
||
border: 1px solid rgba(0, 153, 153, 0.15);
|
||
font-size: 0.93rem;
|
||
font-weight: 600;
|
||
}
|
||
|
||
.grid-2,
|
||
.grid-3,
|
||
.grid-4 {
|
||
display: grid;
|
||
gap: 18px;
|
||
}
|
||
|
||
.grid-2 { grid-template-columns: repeat(2, minmax(0, 1fr)); }
|
||
.grid-3 { grid-template-columns: repeat(3, minmax(0, 1fr)); }
|
||
.grid-4 { grid-template-columns: repeat(4, minmax(0, 1fr)); }
|
||
|
||
.card {
|
||
background: linear-gradient(180deg, #ffffff 0%, #fbfdff 100%);
|
||
border: 1px solid var(--border);
|
||
border-radius: 18px;
|
||
padding: 18px;
|
||
box-shadow: var(--shadow);
|
||
}
|
||
|
||
.card h3,
|
||
.card h4 {
|
||
margin: 0 0 12px;
|
||
font-size: 1.05rem;
|
||
}
|
||
|
||
.card p,
|
||
.card li {
|
||
color: var(--muted);
|
||
line-height: 1.8;
|
||
margin: 0;
|
||
}
|
||
|
||
.card ul {
|
||
margin: 0;
|
||
padding-left: 20px;
|
||
}
|
||
|
||
.card .mini {
|
||
font-size: 0.86rem;
|
||
color: #64748b;
|
||
margin-top: 8px;
|
||
}
|
||
|
||
.diagram,
|
||
.code-block,
|
||
.tree {
|
||
background: var(--code-bg);
|
||
color: var(--code-text);
|
||
padding: 20px;
|
||
border-radius: 18px;
|
||
overflow-x: auto;
|
||
border: 1px solid rgba(148, 163, 184, 0.16);
|
||
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.04);
|
||
}
|
||
|
||
.diagram { font-size: 0.95rem; line-height: 1.5; }
|
||
.code-block { line-height: 1.7; }
|
||
.tree { line-height: 1.65; }
|
||
|
||
.pill-heading {
|
||
display: inline-flex;
|
||
align-items: center;
|
||
gap: 8px;
|
||
padding: 6px 12px;
|
||
border-radius: 999px;
|
||
background: rgba(0, 102, 204, 0.08);
|
||
color: var(--siemens-blue);
|
||
font-weight: 700;
|
||
margin: 14px 0 10px;
|
||
font-size: 0.9rem;
|
||
}
|
||
|
||
table {
|
||
width: 100%;
|
||
border-collapse: collapse;
|
||
overflow: hidden;
|
||
border-radius: 16px;
|
||
border: 1px solid var(--border);
|
||
}
|
||
|
||
th,
|
||
td {
|
||
padding: 14px 14px;
|
||
text-align: left;
|
||
vertical-align: top;
|
||
border-bottom: 1px solid var(--border);
|
||
line-height: 1.7;
|
||
font-size: 0.95rem;
|
||
}
|
||
|
||
th {
|
||
background: linear-gradient(180deg, #edf8f8, #e8f1ff);
|
||
color: #103451;
|
||
font-size: 0.92rem;
|
||
}
|
||
|
||
tr:last-child td { border-bottom: none; }
|
||
|
||
.flow-grid {
|
||
display: grid;
|
||
gap: 16px;
|
||
}
|
||
|
||
.flow-card {
|
||
border: 1px solid var(--border);
|
||
border-radius: 18px;
|
||
padding: 18px;
|
||
background: linear-gradient(180deg, #ffffff 0%, #fbfdff 100%);
|
||
}
|
||
|
||
.flow-card h3 {
|
||
margin: 0 0 12px;
|
||
font-size: 1.06rem;
|
||
}
|
||
|
||
.flow-card pre {
|
||
margin: 0;
|
||
white-space: pre-wrap;
|
||
word-break: break-word;
|
||
line-height: 1.85;
|
||
}
|
||
|
||
.metric-card {
|
||
border: 1px solid var(--border);
|
||
border-radius: 18px;
|
||
padding: 18px;
|
||
background: linear-gradient(180deg, #ffffff 0%, #fafdff 100%);
|
||
box-shadow: var(--shadow);
|
||
}
|
||
|
||
.metric-card h3 {
|
||
margin: 0 0 10px;
|
||
font-size: 1.02rem;
|
||
color: var(--siemens-blue);
|
||
}
|
||
|
||
.metric-card .need-gt {
|
||
display: inline-block;
|
||
padding: 4px 10px;
|
||
border-radius: 999px;
|
||
font-size: 0.82rem;
|
||
font-weight: 700;
|
||
margin-bottom: 12px;
|
||
}
|
||
|
||
.need-gt.yes { background: rgba(0, 102, 204, 0.1); color: var(--siemens-blue); }
|
||
.need-gt.no { background: rgba(0, 153, 153, 0.12); color: var(--siemens-teal); }
|
||
|
||
.formula {
|
||
background: linear-gradient(180deg, #f3fbfb 0%, #eef5ff 100%);
|
||
border: 1px solid rgba(0, 102, 204, 0.14);
|
||
color: #0f4060;
|
||
padding: 18px;
|
||
border-radius: 18px;
|
||
line-height: 1.9;
|
||
}
|
||
|
||
.steps {
|
||
margin: 0;
|
||
padding-left: 18px;
|
||
color: var(--muted);
|
||
line-height: 1.9;
|
||
}
|
||
|
||
.muted { color: var(--muted); }
|
||
.small { font-size: 0.9rem; }
|
||
|
||
.footer {
|
||
text-align: center;
|
||
color: #64748b;
|
||
font-size: 0.88rem;
|
||
padding: 8px 0 24px;
|
||
}
|
||
|
||
.mobile-topbar {
|
||
display: none;
|
||
position: sticky;
|
||
top: 0;
|
||
z-index: 25;
|
||
background: rgba(248, 250, 252, 0.92);
|
||
backdrop-filter: blur(10px);
|
||
border-bottom: 1px solid rgba(15, 23, 42, 0.08);
|
||
padding: 12px 16px;
|
||
margin: -28px -28px 18px;
|
||
}
|
||
|
||
.mobile-topbar button {
|
||
border: none;
|
||
background: linear-gradient(135deg, var(--siemens-teal), var(--siemens-blue));
|
||
color: #fff;
|
||
border-radius: 12px;
|
||
padding: 10px 14px;
|
||
font-weight: 700;
|
||
cursor: pointer;
|
||
}
|
||
|
||
.sidebar-backdrop {
|
||
display: none;
|
||
position: fixed;
|
||
inset: 0;
|
||
background: rgba(15, 23, 42, 0.42);
|
||
z-index: 15;
|
||
}
|
||
|
||
@media (max-width: 1180px) {
|
||
.hero-grid,
|
||
.grid-4 {
|
||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||
}
|
||
}
|
||
|
||
@media (max-width: 980px) {
|
||
.sidebar {
|
||
transform: translateX(-100%);
|
||
transition: transform 0.24s ease;
|
||
}
|
||
.sidebar.open { transform: translateX(0); }
|
||
.sidebar-backdrop.show { display: block; }
|
||
.main {
|
||
margin-left: 0;
|
||
width: 100%;
|
||
}
|
||
.mobile-topbar { display: flex; justify-content: space-between; align-items: center; }
|
||
.grid-2,
|
||
.grid-3,
|
||
.grid-4,
|
||
.hero-grid,
|
||
.hero-stats {
|
||
grid-template-columns: 1fr;
|
||
}
|
||
}
|
||
|
||
@media (max-width: 640px) {
|
||
.main { padding: 18px; }
|
||
.hero, section { padding: 20px; }
|
||
.mobile-topbar { margin: -18px -18px 16px; }
|
||
th, td { padding: 12px 10px; }
|
||
}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div class="sidebar-backdrop" id="sidebar-backdrop"></div>
|
||
<div class="layout">
|
||
<aside class="sidebar" id="sidebar">
|
||
<div class="brand">
|
||
<h1>Siemens RAGAS<br />项目文档</h1>
|
||
<p>西门子医疗影像 CT 知识库 RAG 评估平台</p>
|
||
</div>
|
||
<nav>
|
||
<a href="#overview">1. 项目概述</a>
|
||
<a href="#architecture">2. 系统架构</a>
|
||
<a href="#modules">3. 核心模块说明</a>
|
||
<a href="#flows">4. 数据流说明</a>
|
||
<a href="#metrics">5. RAGAS 评估指标</a>
|
||
<a href="#apis">6. API 接口文档</a>
|
||
<a href="#weights">7. 指标权重配置</a>
|
||
<a href="#deployment">8. 部署说明</a>
|
||
<a href="#stack">9. 技术栈</a>
|
||
<a href="#structure">10. 目录结构</a>
|
||
</nav>
|
||
<div class="meta">
|
||
<div><b>生成时间</b><br />2026-06-24</div>
|
||
<div style="margin-top:10px;"><b>输出文件</b><br />project-overview.html</div>
|
||
<div style="margin-top:10px;"><b>来源</b><br />README / pyproject / main.py / webmain.py / rag_eval / webapp / scenarios / .env.example</div>
|
||
</div>
|
||
</aside>
|
||
|
||
<main class="main">
|
||
<div class="mobile-topbar">
|
||
<strong>Siemens RAGAS</strong>
|
||
<button id="menu-toggle" type="button">目录</button>
|
||
</div>
|
||
|
||
<div class="container">
|
||
<header class="hero">
|
||
<div class="eyebrow">Siemens Healthineers · RAG Evaluation Platform</div>
|
||
<h2>Siemens RAGAS RAG 评估平台:面向 CT 知识库的自动化质量评估闭环</h2>
|
||
<p>
|
||
本项目将 <b>PDF 文档解析</b>、<b>题库生成</b>、<b>在线/离线 RAGAS 评测</b>、<b>报告沉淀与 Web 可视化</b>
|
||
统一进一个可复用平台。CLI 与 FastAPI Web 控制台共享同一套 <code>rag_eval</code> 核心引擎,适合批量评估、持续优化与 Dify 实时评分集成。
|
||
</p>
|
||
<div class="hero-grid">
|
||
<div>
|
||
<div class="hero-stats">
|
||
<div class="stat"><b>3</b><span>入口形态<br />CLI / Web / REST API</span></div>
|
||
<div class="stat"><b>7</b><span>RAGAS 指标<br />含 GT 依赖与非依赖</span></div>
|
||
<div class="stat"><b>4</b><span>核心流程<br />Build / Eval / Pipeline / Score</span></div>
|
||
<div class="stat"><b>2</b><span>适配模式<br />HTTP / Python Adapter</span></div>
|
||
</div>
|
||
</div>
|
||
<div class="hero-note">
|
||
<b>核心价值:</b>将 PDF 资料转成可评测题库,再以 Siemens 医疗影像场景为中心完成答题、打分、加权汇总与报告产物沉淀,形成完整质量治理闭环。
|
||
</div>
|
||
</div>
|
||
</header>
|
||
|
||
<section id="overview">
|
||
<h2 class="section-title">1. 项目概述</h2>
|
||
<p class="section-intro">
|
||
<b>项目名称:</b>Siemens RAGAS RAG 评估平台。<br />
|
||
<b>目标:</b>为西门子医疗影像 CT 知识库 RAG 系统提供自动化质量评估。<br />
|
||
<b>定位:</b>既能作为离线评测框架,也能作为在线评估控制台与 API 服务,为知识库 QA、Prompt 迭代、检索策略优化提供统一基线。
|
||
</p>
|
||
|
||
<div class="grid-2">
|
||
<div class="card">
|
||
<h3>业务闭环</h3>
|
||
<p>PDF解析 → 题库生成 → RAGAS评测 → 报告可视化 → 再迭代。项目不仅覆盖评测本身,还覆盖评测数据源建设与运行产物管理。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>运行方式</h3>
|
||
<p><code>main.py</code> 负责 CLI 评估与 dataset build,<code>webmain.py</code> 负责启动 FastAPI 控制台,<code>webapp.server</code> 暴露 REST API 与静态前端。</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="pill-heading">技术亮点</div>
|
||
<div class="badges">
|
||
<span class="badge">统一 CLI / Web / API 三入口</span>
|
||
<span class="badge">阿里云 DocMind 文档解析</span>
|
||
<span class="badge">OpenAI 兼容模型接入</span>
|
||
<span class="badge">RAGAS 0.4.3 指标流水线</span>
|
||
<span class="badge">在线 / 离线双模式评估</span>
|
||
<span class="badge">Python / HTTP Adapter 扩展机制</span>
|
||
<span class="badge">场景 YAML 驱动</span>
|
||
<span class="badge">Pipeline 后台线程编排</span>
|
||
<span class="badge">Dify 实时单题评分接口</span>
|
||
<span class="badge">metric_weights + doc_weights 加权汇总</span>
|
||
<span class="badge">历史 run 资产沉淀</span>
|
||
<span class="badge">Web 报告聚合与分布分析</span>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="architecture">
|
||
<h2 class="section-title">2. 系统架构</h2>
|
||
<p class="section-intro">
|
||
平台采用“多入口 + 单评估核心”的结构。CLI 和 Web 控制台都汇入 <code>rag_eval</code> 核心引擎;API 层只负责任务编排、配置管理与结果查询。
|
||
</p>
|
||
|
||
<pre class="diagram">┌─────────────────────────────────────────────────────────┐
|
||
│ siemens_ragas 平台 │
|
||
├─────────────┬───────────────────┬───────────────────────┤
|
||
│ CLI 入口 │ Web 控制台 │ REST API │
|
||
│ main.py │ webmain.py │ FastAPI │
|
||
├─────────────┴───────────────────┴───────────────────────┤
|
||
│ 核心评估引擎 (rag_eval) │
|
||
├──────────────┬──────────────────┬────────────────────────┤
|
||
│ dataset_ │ execution/ │ metrics/ │
|
||
│ builder/ │ evaluator.py │ pipeline.py │
|
||
│ (PDF→题库) │ (评估流程) │ (RAGAS指标) │
|
||
├──────────────┴──────────────────┴────────────────────────┤
|
||
│ 外部依赖 │
|
||
│ 阿里云DocMind (PDF解析) │ OpenAI兼容API (LLM/Embedding) │
|
||
└─────────────────────────────────────────────────────────┘</pre>
|
||
|
||
<div class="grid-3" style="margin-top:18px;">
|
||
<div class="card">
|
||
<h3>CLI 编排</h3>
|
||
<p><code>main.py</code> 通过互斥参数在 <code>--scenario</code> 与 <code>--dataset-build-config</code> 之间分派,分别进入评估流程与题库构建流程。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>Web 服务</h3>
|
||
<p><code>webmain.py</code> 负责 uvicorn 启动、日志文件轮转与 host/port 配置;<code>webapp.server</code> 注册 runs、scenarios、evaluations、pipeline、score 等 API。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>核心执行器</h3>
|
||
<p><code>rag_eval.execution.runner</code> 负责加载 scenario、构建模型与 adapter、调用 <code>Evaluator</code> 执行并写出标准化产物。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="modules">
|
||
<h2 class="section-title">3. 核心模块说明</h2>
|
||
<p class="section-intro">以下模块覆盖数据准备、评估执行、Web 管理与 Siemens 业务适配的主要职责边界。</p>
|
||
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>模块</th>
|
||
<th>路径</th>
|
||
<th>职责</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><b>dataset_builder</b></td>
|
||
<td><code>rag_eval/dataset_builder/</code></td>
|
||
<td>PDF解析、source chunk 归一化、LLM 题目生成、草稿题库与构建产物写出。</td>
|
||
</tr>
|
||
<tr>
|
||
<td><b>execution</b></td>
|
||
<td><code>rag_eval/execution/</code></td>
|
||
<td>评估编排、在线/离线模式切换、adapter 调用、RAGAS 打分与结果聚合。</td>
|
||
</tr>
|
||
<tr>
|
||
<td><b>metrics</b></td>
|
||
<td><code>rag_eval/metrics/</code></td>
|
||
<td>RAGAS 指标注册、模型构建、评估管道装配、指标权重与文档权重聚合。</td>
|
||
</tr>
|
||
<tr>
|
||
<td><b>reporting</b></td>
|
||
<td><code>rag_eval/reporting/</code></td>
|
||
<td>运行产物写入、summary 生成、metadata 与 scenario 快照沉淀。</td>
|
||
</tr>
|
||
<tr>
|
||
<td><b>adapters</b></td>
|
||
<td><code>rag_eval/adapters/</code></td>
|
||
<td>HTTP/Python 应用适配器封装,把外部应用结果统一为 <code>answer / contexts / raw_response</code>。</td>
|
||
</tr>
|
||
<tr>
|
||
<td><b>webapp</b></td>
|
||
<td><code>webapp/</code></td>
|
||
<td>FastAPI Web 控制台、OpenAPI 文档、任务后台管理、场景扫描、历史报告查询。</td>
|
||
</tr>
|
||
<tr>
|
||
<td><b>apps</b></td>
|
||
<td><code>apps/siemens_pdf_qa/</code></td>
|
||
<td>西门子 CT 知识库问答适配器,基于 source chunk 证据构造 Prompt 并调用 OpenAI 兼容模型生成答案。</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
|
||
<div class="grid-3" style="margin-top:18px;">
|
||
<div class="card">
|
||
<h3>settings.py</h3>
|
||
<p>集中读取 <code>.env</code>:OpenAI Key/Base URL、RAGAS Judge/Embedding 模型、并发、阿里云 DocMind、<code>SCORE_API_TOKEN</code> 等。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>registry.py</h3>
|
||
<p>定义 7 个受支持指标:faithfulness、answer_relevancy、context_recall、context_precision、noise_sensitivity、factual_correctness、semantic_similarity。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>inline_scorer.py</h3>
|
||
<p>为 <code>/api/score</code> 提供模块级缓存评分器,按 <code>(judge_model, embedding_model)</code> 复用 LLM 与 embedding 连接。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="flows">
|
||
<h2 class="section-title">4. 数据流说明</h2>
|
||
<p class="section-intro">项目围绕四条关键流程展开:题库构建、在线评估、API 全链路 Pipeline 与 Dify 实时评分。</p>
|
||
|
||
<div class="flow-grid">
|
||
<div class="flow-card">
|
||
<h3>Flow A: 题库生成流程(Dataset Build)</h3>
|
||
<pre>PDF文件 → 阿里云DocMind解析 → 文档切片(source_chunks)
|
||
→ LLM生成题目 → CSV题库文件 → 人工审核</pre>
|
||
<p class="small muted" style="margin-top:12px;">对应入口:<code>main.py --dataset-build-config</code>;核心实现:<code>rag_eval.dataset_builder.runner</code>。</p>
|
||
</div>
|
||
|
||
<div class="flow-card">
|
||
<h3>Flow B: RAGAS评估流程(Online Evaluation)</h3>
|
||
<pre>CSV题库 → 规范化样本 → 应用适配器(siemens_pdf_qa)
|
||
→ LLM答题 → RAGAS指标计算 → 加权得分 → 报告产物</pre>
|
||
<p class="small muted" style="margin-top:12px;">对应 Siemens 场景:<code>scenarios/online/siemens-pdf-question-bank-online.yaml</code>,由 <code>apps.siemens_pdf_qa.adapter:run</code> 提供答案与证据片段。</p>
|
||
</div>
|
||
|
||
<div class="flow-card">
|
||
<h3>Flow C: 全链路 Pipeline(API触发)</h3>
|
||
<pre>POST /api/pipeline/jobs → 后台线程 → Flow A → Flow B → 产物路径</pre>
|
||
<p class="small muted" style="margin-top:12px;">由 <code>webapp.services.pipeline_task_manager</code> 在线程池中串行执行 <code>parsing_documents → generating_questions → evaluating</code> 三阶段,并返回 <code>scores.csv / summary.md / dataset.csv</code> 等路径。</p>
|
||
</div>
|
||
|
||
<div class="flow-card">
|
||
<h3>Flow D: Dify实时评分(/api/score)</h3>
|
||
<pre>Dify Agent → POST /api/score → InlineScorer → RAGAS metrics → 得分JSON</pre>
|
||
<p class="small muted" style="margin-top:12px;">当 <code>ground_truth</code> 缺失时,会自动跳过依赖参考答案的指标,并在响应中给出 <code>skipped_metrics</code>。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="metrics">
|
||
<h2 class="section-title">5. RAGAS 评估指标</h2>
|
||
<p class="section-intro">平台当前支持 7 个指标,既覆盖回答忠实度和相关性,也覆盖对参考答案、噪声片段和语义相似度的衡量。</p>
|
||
|
||
<div class="grid-4">
|
||
<div class="metric-card">
|
||
<h3>faithfulness</h3>
|
||
<span class="need-gt no">无需 ground_truth</span>
|
||
<p>回答对检索内容的忠实度,用于防止模型脱离证据“幻觉式”作答。</p>
|
||
</div>
|
||
<div class="metric-card">
|
||
<h3>answer_relevancy</h3>
|
||
<span class="need-gt no">无需 ground_truth</span>
|
||
<p>衡量回答与问题本身的相关性,判断是否真正命中用户所问。</p>
|
||
</div>
|
||
<div class="metric-card">
|
||
<h3>context_precision</h3>
|
||
<span class="need-gt no">无需 ground_truth</span>
|
||
<p>衡量检索片段的精准度,关注上下文中与回答真正相关的比例。</p>
|
||
</div>
|
||
<div class="metric-card">
|
||
<h3>context_recall</h3>
|
||
<span class="need-gt yes">需要 ground_truth</span>
|
||
<p>检索内容对标准答案覆盖程度,反映召回是否足够支撑正确作答。</p>
|
||
</div>
|
||
<div class="metric-card">
|
||
<h3>noise_sensitivity</h3>
|
||
<span class="need-gt yes">需要 ground_truth</span>
|
||
<p>系统对噪声检索片段的鲁棒性,越不受无关片段干扰越好。</p>
|
||
</div>
|
||
<div class="metric-card">
|
||
<h3>factual_correctness</h3>
|
||
<span class="need-gt yes">需要 ground_truth</span>
|
||
<p>与标准答案对齐的事实准确性,代码中作为端到端事实正确度指标。</p>
|
||
</div>
|
||
<div class="metric-card">
|
||
<h3>semantic_similarity</h3>
|
||
<span class="need-gt yes">需要 ground_truth</span>
|
||
<p>回答与标准答案的语义相似度,依赖 embedding,不需要额外 LLM 调用。</p>
|
||
</div>
|
||
<div class="metric-card">
|
||
<h3>指标默认集合</h3>
|
||
<span class="need-gt no">在线评分默认</span>
|
||
<p><code>/api/score</code> 的默认指标集合为 faithfulness、answer_relevancy、context_recall、context_precision。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="apis">
|
||
<h2 class="section-title">6. API 接口文档</h2>
|
||
<p class="section-intro">Web 层基于 FastAPI 提供任务提交、历史查询、LLM 配置管理与 Dify 实时评分接口。</p>
|
||
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>方法</th>
|
||
<th>路径</th>
|
||
<th>说明</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr><td><b>POST</b></td><td><code>/api/pipeline/jobs</code></td><td>提交全链路评估任务,后台线程自动执行 PDF 解析、题库生成和在线评估。</td></tr>
|
||
<tr><td><b>GET</b></td><td><code>/api/pipeline/jobs/{id}</code></td><td>查询指定 Pipeline 任务状态、阶段、日志和产物路径。</td></tr>
|
||
<tr><td><b>POST</b></td><td><code>/api/score</code></td><td>Dify 实时评分接口,接收单条问答并返回各指标得分与综合得分。</td></tr>
|
||
<tr><td><b>POST</b></td><td><code>/api/llm-profiles/probe</code></td><td>临时测试 LLM / Embedding 连通性,无需先保存配置。</td></tr>
|
||
<tr><td><b>POST</b></td><td><code>/api/llm-profiles</code></td><td>创建命名的 LLM 配置档案,可供场景或控制台复用。</td></tr>
|
||
<tr><td><b>POST</b></td><td><code>/api/evaluations</code></td><td>基于已有场景 YAML 启动一次后台评估任务。</td></tr>
|
||
<tr><td><b>GET</b></td><td><code>/api/runs</code></td><td>获取历史运行列表,用于控制台报告页与明细页渲染。</td></tr>
|
||
</tbody>
|
||
</table>
|
||
|
||
<div class="grid-2" style="margin-top:18px;">
|
||
<div class="card">
|
||
<h3>/api/score 请求示例</h3>
|
||
<pre class="code-block">POST /api/score
|
||
Authorization: Bearer <token>
|
||
Content-Type: application/json
|
||
|
||
{
|
||
"question": "双源CT的时间分辨率是多少?",
|
||
"answer": "双源CT的单扇区时间分辨率为75ms。",
|
||
"contexts": "双源CT采用两套管-探测器系统 |||| 单扇区采集旋转135度",
|
||
"ground_truth": "双源CT单扇区时间分辨率为75ms,需旋转135度。",
|
||
"context_separator": " |||| ",
|
||
"metrics": [
|
||
"faithfulness",
|
||
"answer_relevancy",
|
||
"context_recall",
|
||
"context_precision"
|
||
],
|
||
"judge_model": "gpt-5",
|
||
"embedding_model": "text-embedding-3-small"
|
||
}</pre>
|
||
</div>
|
||
<div class="card">
|
||
<h3>/api/score 响应示例</h3>
|
||
<pre class="code-block">{
|
||
"scores": {
|
||
"faithfulness": 0.875,
|
||
"answer_relevancy": 0.92,
|
||
"context_recall": 0.81,
|
||
"context_precision": 0.85
|
||
},
|
||
"weighted_score": 0.8638,
|
||
"latency_ms": 3420,
|
||
"skipped_metrics": [],
|
||
"error": null
|
||
}</pre>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="grid-3" style="margin-top:18px;">
|
||
<div class="card">
|
||
<h3>Pipeline API</h3>
|
||
<p>返回 <code>job_id</code> 后即可轮询。阶段枚举包括 <code>queued</code>、<code>running</code>、<code>parsing_documents</code>、<code>evaluating</code> 与 <code>done</code>。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>LLM Profiles</h3>
|
||
<p>支持保存 base URL、API Key、model、timeout,并通过 <code>/apply</code> 将配置写回场景 YAML,同时可补充 metric/doc 权重。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>Runs API</h3>
|
||
<p>历史 run 会读取 <code>summary.md</code>、<code>scores.csv</code> 与 <code>scenario.snapshot.yaml</code>,聚合指标均值、分布和最低分样本。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="weights">
|
||
<h2 class="section-title">7. 指标权重配置</h2>
|
||
<p class="section-intro">平台支持两级权重:<code>metric_weights</code> 控制不同指标的重要性,<code>doc_weights</code> 控制不同文档对总体分数的影响。</p>
|
||
|
||
<pre class="code-block">metric_weights:
|
||
faithfulness: 0.35
|
||
context_recall: 0.25
|
||
context_precision: 0.20
|
||
answer_relevancy: 0.20
|
||
|
||
doc_weights:
|
||
"322_双源CT.pdf": 2.0</pre>
|
||
|
||
<div class="formula" style="margin-top:18px;">
|
||
<b>weighted_score 计算逻辑:</b><br />
|
||
1. <b>单样本综合分</b> = Σ(有效指标分 × 指标权重) / Σ(有效指标权重)<br />
|
||
2. <b>总体综合分</b> = Σ(单样本综合分 × 文档权重) / Σ(文档权重)<br />
|
||
3. 当权重未配置时,代码会自动退化为默认 1.0,即普通平均。<br />
|
||
4. <code>doc_weights</code> 不仅影响总体综合分,也会影响控制台中按文档聚合后的指标均值。
|
||
</div>
|
||
|
||
<div class="grid-2" style="margin-top:18px;">
|
||
<div class="card">
|
||
<h3>代码依据</h3>
|
||
<p><code>rag_eval.metrics.weights.compute_weighted_score()</code> 负责单样本指标加权;<code>compute_overall_weighted_score_mean()</code> 负责跨样本文档加权。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>配置入口</h3>
|
||
<p>场景 YAML 可直接定义;Web 控制台也可通过 <code>/api/llm-profiles/apply</code> 将权重补丁写回场景文件。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="deployment">
|
||
<h2 class="section-title">8. 部署说明</h2>
|
||
<p class="section-intro">仓库自带 <code>deploy.sh</code>,适合 Linux 一键部署 Web 控制台与相关依赖。</p>
|
||
|
||
<div class="grid-2">
|
||
<div class="card">
|
||
<h3>Linux 部署步骤</h3>
|
||
<ol class="steps">
|
||
<li>准备 Python 3.12+ 环境,执行 <code>bash deploy.sh</code>。</li>
|
||
<li>脚本会自动创建 <code>.venv</code>、安装 <code>pyproject.toml</code> 依赖,并补装 <code>fastapi / uvicorn / httpx</code>。</li>
|
||
<li>若 <code>.env</code> 不存在,将从 <code>.env.example</code> 复制一份模板。</li>
|
||
<li>脚本初始化 <code>configs / logs / outputs / datasets</code> 目录,并尝试生成 demo 数据。</li>
|
||
<li>最后使用 <code>webmain.py</code> 后台启动服务,默认端口 8800,冲突时回退到 8801。</li>
|
||
</ol>
|
||
</div>
|
||
<div class="card">
|
||
<h3>常用命令</h3>
|
||
<pre class="code-block"># 依赖安装
|
||
uv sync
|
||
|
||
# CLI 运行在线/离线评估
|
||
.\.venv\Scripts\python.exe main.py --scenario scenarios\online\siemens-pdf-question-bank-online.yaml
|
||
|
||
# CLI 运行题库生成
|
||
.\.venv\Scripts\python.exe main.py --dataset-build-config scenarios\siemens_build\siemens-pdf-build.yaml
|
||
|
||
# 启动 Web 控制台
|
||
.\.venv\Scripts\python.exe webmain.py --host 127.0.0.1 --port 8800</pre>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="pill-heading">关键 .env 配置</div>
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>变量</th>
|
||
<th>用途</th>
|
||
<th>示例 / 默认</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr><td><code>OPENAI_API_KEY</code></td><td>OpenAI 兼容接口凭据</td><td><code>your-api-key</code></td></tr>
|
||
<tr><td><code>OPENAI_BASE_URL</code></td><td>统一 LLM / Embedding 网关地址</td><td><code>http://6.86.80.4:30080/v1</code></td></tr>
|
||
<tr><td><code>RAGAS_JUDGE_MODEL</code></td><td>RAGAS Judge 默认模型</td><td><code>gpt-5</code></td></tr>
|
||
<tr><td><code>RAGAS_EMBEDDING_MODEL</code></td><td>Embedding 默认模型</td><td><code>text-embedding-3-small</code></td></tr>
|
||
<tr><td><code>ALIBABA_ACCESS_KEY_ID</code> / <code>ALIBABA_ACCESS_KEY_SECRET</code></td><td>阿里云 DocMind 凭据</td><td>dataset build 必填</td></tr>
|
||
<tr><td><code>ALIBABA_ENDPOINT</code></td><td>DocMind 服务域名</td><td><code>docmind-api.cn-hangzhou.aliyuncs.com</code></td></tr>
|
||
<tr><td><code>DATASET_GENERATOR_MODEL</code></td><td>题库生成默认模型</td><td><code>qwen3.6-plus</code></td></tr>
|
||
<tr><td><code>SCORE_API_TOKEN</code></td><td><code>/api/score</code> Bearer 鉴权令牌</td><td>留空则不鉴权</td></tr>
|
||
<tr><td><code>RAGAS_METRIC_TIMEOUT_SECONDS</code></td><td>RAGAS 指标计算超时</td><td><code>300</code>(7指标建议值)</td></tr>
|
||
</tbody>
|
||
</table>
|
||
</section>
|
||
|
||
<section id="stack">
|
||
<h2 class="section-title">9. 技术栈</h2>
|
||
<p class="section-intro">从依赖声明、Web 服务实现与测试代码来看,项目的技术栈如下。</p>
|
||
|
||
<div class="stack-badges">
|
||
<span class="badge"><b>后端</b> Python 3.12 · FastAPI · RAGAS 0.4.3 · Pydantic v2 · uvicorn</span>
|
||
<span class="badge"><b>AI / ML</b> OpenAI SDK · LangChain · ragas · instructor 风格结构</span>
|
||
<span class="badge"><b>文档解析</b> 阿里云 DocMind</span>
|
||
<span class="badge"><b>前端</b> Vanilla JS · Chart.js 风格报告页</span>
|
||
<span class="badge"><b>测试</b> pytest · FastAPI TestClient</span>
|
||
<span class="badge"><b>工具链</b> uv · pyproject.toml · YAML 场景驱动</span>
|
||
</div>
|
||
|
||
<div class="grid-3" style="margin-top:18px;">
|
||
<div class="card">
|
||
<h3>依赖声明</h3>
|
||
<p><code>pyproject.toml</code> 中列出了 <code>ragas==0.4.3</code>、<code>langchain-openai</code>、<code>datasets</code>、<code>pydantic-settings</code>、阿里云 DocMind SDK 等核心依赖。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>Web 生态</h3>
|
||
<p>控制台基于 FastAPI + 静态页面,服务器通过 <code>webmain.py</code> 配置日志和 uvicorn,前端报告页结合图表与表格展示历史 run。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>测试现状</h3>
|
||
<p>仓库同时存在 <code>pytest</code> 与 <code>fastapi.testclient.TestClient</code> 用例,涵盖 Pipeline、权重聚合、实时评分与 LLM 配置接口。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="structure">
|
||
<h2 class="section-title">10. 目录结构</h2>
|
||
<p class="section-intro">以下树状图概括了项目中最关键的源码、配置、数据、输出与测试位置。</p>
|
||
|
||
<pre class="tree">siemens_ragas/
|
||
├── README.md
|
||
├── pyproject.toml
|
||
├── main.py
|
||
├── webmain.py
|
||
├── deploy.sh
|
||
├── .env.example
|
||
├── apps/
|
||
│ ├── pdf_question_bank/
|
||
│ ├── sample_python/
|
||
│ └── siemens_pdf_qa/
|
||
├── datasets/
|
||
│ ├── raw/
|
||
│ └── normalized/
|
||
├── docs/
|
||
├── outputs/
|
||
├── scenarios/
|
||
│ ├── online/
|
||
│ ├── offline/
|
||
│ └── siemens_build/
|
||
├── tests/
|
||
│ ├── webapp/
|
||
│ ├── test_pipeline.py
|
||
│ ├── test_weights.py
|
||
│ └── test_webapp_report_builder.py
|
||
├── rag_eval/
|
||
│ ├── adapters/
|
||
│ ├── advisor/
|
||
│ ├── config/
|
||
│ ├── datasets/
|
||
│ ├── dataset_builder/
|
||
│ │ ├── generator/
|
||
│ │ └── parser/
|
||
│ ├── execution/
|
||
│ ├── metrics/
|
||
│ ├── reporting/
|
||
│ └── shared/
|
||
└── webapp/
|
||
├── api/
|
||
├── services/
|
||
└── static/
|
||
├── css/
|
||
└── js/</pre>
|
||
|
||
<div class="grid-2" style="margin-top:18px;">
|
||
<div class="card">
|
||
<h3>源码主路径</h3>
|
||
<p><code>rag_eval/</code> 是平台核心,<code>webapp/</code> 负责服务端 API 与控制台,<code>apps/</code> 放置面向不同业务应用的 adapter。</p>
|
||
</div>
|
||
<div class="card">
|
||
<h3>配置与产物</h3>
|
||
<p><code>scenarios/</code> 维护 YAML 场景,<code>datasets/</code> 存放原始/规范化数据,<code>outputs/</code> 产出评测运行结果与 dataset build 工件。</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<div class="footer">
|
||
Generated for <b>siemens_ragas</b> · Self-contained HTML overview · Siemens teal/blue documentation theme
|
||
</div>
|
||
</div>
|
||
</main>
|
||
</div>
|
||
|
||
<script>
|
||
const sidebar = document.getElementById('sidebar');
|
||
const menuToggle = document.getElementById('menu-toggle');
|
||
const backdrop = document.getElementById('sidebar-backdrop');
|
||
const navLinks = Array.from(document.querySelectorAll('.sidebar nav a'));
|
||
const sections = Array.from(document.querySelectorAll('section[id]'));
|
||
|
||
function closeSidebar() {
|
||
sidebar.classList.remove('open');
|
||
backdrop.classList.remove('show');
|
||
}
|
||
|
||
function openSidebar() {
|
||
sidebar.classList.add('open');
|
||
backdrop.classList.add('show');
|
||
}
|
||
|
||
if (menuToggle) {
|
||
menuToggle.addEventListener('click', () => {
|
||
if (sidebar.classList.contains('open')) {
|
||
closeSidebar();
|
||
} else {
|
||
openSidebar();
|
||
}
|
||
});
|
||
}
|
||
|
||
if (backdrop) {
|
||
backdrop.addEventListener('click', closeSidebar);
|
||
}
|
||
|
||
navLinks.forEach(link => {
|
||
link.addEventListener('click', () => {
|
||
if (window.innerWidth <= 980) closeSidebar();
|
||
});
|
||
});
|
||
|
||
const observer = new IntersectionObserver((entries) => {
|
||
entries.forEach(entry => {
|
||
if (!entry.isIntersecting) return;
|
||
const id = entry.target.getAttribute('id');
|
||
navLinks.forEach(link => {
|
||
const active = link.getAttribute('href') === `#${id}`;
|
||
link.classList.toggle('active', active);
|
||
});
|
||
});
|
||
}, {
|
||
rootMargin: '-20% 0px -60% 0px',
|
||
threshold: 0.1
|
||
});
|
||
|
||
sections.forEach(section => observer.observe(section));
|
||
</script>
|
||
</body>
|
||
</html>
|