diff --git a/project-overview.html b/project-overview.html
new file mode 100644
index 0000000..b029a26
--- /dev/null
+++ b/project-overview.html
@@ -0,0 +1,1101 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Siemens RAGAS 项目总览</title>
+  <style>
+    :root {
+      --siemens-teal: #009999;
+      --siemens-blue: #0066CC;
+      --bg: #f8fafc;
+      --surface: #ffffff;
+      --surface-soft: #eef6f8;
+      --text: #0f172a;
+      --muted: #475569;
+      --border: #dbe4ee;
+      --code-bg: #1e293b;
+      --code-text: #e2e8f0;
+      --shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
+      --radius: 18px;
+      --sidebar-width: 260px;
+      --content-max: 1360px;
+    }
+
+    * { box-sizing: border-box; }
+    html { scroll-behavior: smooth; }
+    body {
+      margin: 0;
+      font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+      color: var(--text);
+      background: linear-gradient(180deg, #f8fafc 0%, #eff7fb 100%);
+    }
+
+    a { color: inherit; text-decoration: none; }
+    code, pre { font-family: "Cascadia Code", "SFMono-Regular", Consolas, monospace; }
+
+    .layout {
+      display: flex;
+      min-height: 100vh;
+    }
+
+    .sidebar {
+      position: fixed;
+      inset: 0 auto 0 0;
+      width: var(--sidebar-width);
+      background: linear-gradient(180deg, #062630 0%, #083947 42%, #0a4a65 100%);
+      color: #e6fffb;
+      padding: 26px 18px 24px;
+      overflow-y: auto;
+      box-shadow: 4px 0 18px rgba(0, 0, 0, 0.12);
+      z-index: 20;
+    }
+
+    .brand {
+      margin-bottom: 22px;
+      padding-bottom: 18px;
+      border-bottom: 1px solid rgba(255, 255, 255, 0.14);
+    }
+
+    .brand h1 {
+      margin: 0;
+      font-size: 1.22rem;
+      line-height: 1.35;
+    }
+
+    .brand p {
+      margin: 10px 0 0;
+      font-size: 0.92rem;
+      color: rgba(230, 255, 251, 0.78);
+    }
+
+    .sidebar nav {
+      display: grid;
+      gap: 8px;
+    }
+
+    .sidebar nav a {
+      display: block;
+      padding: 10px 12px;
+      border-radius: 12px;
+      color: rgba(255, 255, 255, 0.82);
+      font-size: 0.95rem;
+      transition: 0.2s ease;
+      border: 1px solid transparent;
+    }
+
+    .sidebar nav a:hover,
+    .sidebar nav a.active {
+      background: rgba(255, 255, 255, 0.12);
+      color: #fff;
+      border-color: rgba(255, 255, 255, 0.18);
+      transform: translateX(2px);
+    }
+
+    .sidebar .meta {
+      margin-top: 22px;
+      padding-top: 16px;
+      border-top: 1px solid rgba(255, 255, 255, 0.14);
+      font-size: 0.84rem;
+      color: rgba(230, 255, 251, 0.72);
+      line-height: 1.7;
+    }
+
+    .main {
+      margin-left: var(--sidebar-width);
+      width: calc(100% - var(--sidebar-width));
+      padding: 28px;
+    }
+
+    .container {
+      max-width: var(--content-max);
+      margin: 0 auto;
+    }
+
+    .hero {
+      position: relative;
+      overflow: hidden;
+      background: radial-gradient(circle at top right, rgba(0, 102, 204, 0.22), transparent 34%),
+                  linear-gradient(135deg, #ffffff 0%, #effafb 55%, #e6f2ff 100%);
+      border: 1px solid rgba(0, 153, 153, 0.15);
+      border-radius: 26px;
+      padding: 34px;
+      box-shadow: var(--shadow);
+      margin-bottom: 28px;
+    }
+
+    .hero::after {
+      content: "";
+      position: absolute;
+      right: -60px;
+      top: -60px;
+      width: 220px;
+      height: 220px;
+      border-radius: 50%;
+      background: radial-gradient(circle, rgba(0, 153, 153, 0.18), transparent 70%);
+      pointer-events: none;
+    }
+
+    .eyebrow {
+      display: inline-flex;
+      align-items: center;
+      gap: 8px;
+      padding: 6px 12px;
+      border-radius: 999px;
+      background: rgba(0, 153, 153, 0.08);
+      color: var(--siemens-teal);
+      font-weight: 700;
+      font-size: 0.86rem;
+      margin-bottom: 16px;
+    }
+
+    .hero h2 {
+      margin: 0;
+      font-size: clamp(2rem, 3vw, 3.1rem);
+      line-height: 1.12;
+      letter-spacing: -0.03em;
+      max-width: 900px;
+    }
+
+    .hero p {
+      max-width: 900px;
+      font-size: 1.02rem;
+      line-height: 1.8;
+      color: var(--muted);
+      margin: 16px 0 0;
+    }
+
+    .hero-grid {
+      display: grid;
+      grid-template-columns: 1.5fr 1fr;
+      gap: 24px;
+      align-items: end;
+      margin-top: 24px;
+    }
+
+    .hero-stats {
+      display: grid;
+      grid-template-columns: repeat(4, minmax(0, 1fr));
+      gap: 14px;
+    }
+
+    .stat {
+      background: rgba(255, 255, 255, 0.8);
+      border: 1px solid rgba(0, 102, 204, 0.1);
+      border-radius: 18px;
+      padding: 16px;
+      box-shadow: var(--shadow);
+    }
+
+    .stat b {
+      display: block;
+      font-size: 1.55rem;
+      color: var(--siemens-blue);
+      margin-bottom: 6px;
+    }
+
+    .stat span {
+      color: var(--muted);
+      font-size: 0.92rem;
+    }
+
+    .hero-note {
+      background: linear-gradient(135deg, rgba(0, 153, 153, 0.12), rgba(0, 102, 204, 0.1));
+      border: 1px solid rgba(0, 153, 153, 0.14);
+      border-radius: 20px;
+      padding: 20px;
+      line-height: 1.8;
+      color: #103451;
+    }
+
+    .hero-note b { color: var(--siemens-blue); }
+
+    section {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 22px;
+      padding: 28px;
+      box-shadow: var(--shadow);
+      margin-bottom: 24px;
+      scroll-margin-top: 18px;
+    }
+
+    .section-title {
+      display: flex;
+      align-items: center;
+      gap: 14px;
+      margin: 0 0 18px;
+      font-size: 1.5rem;
+      letter-spacing: -0.02em;
+    }
+
+    .section-title::before {
+      content: "";
+      width: 6px;
+      height: 28px;
+      border-radius: 999px;
+      background: linear-gradient(180deg, var(--siemens-teal), var(--siemens-blue));
+      flex: 0 0 auto;
+    }
+
+    .section-intro {
+      margin: 0 0 18px;
+      color: var(--muted);
+      line-height: 1.85;
+    }
+
+    .badges,
+    .stack-badges {
+      display: flex;
+      flex-wrap: wrap;
+      gap: 10px;
+    }
+
+    .badge {
+      display: inline-flex;
+      align-items: center;
+      gap: 8px;
+      padding: 9px 14px;
+      border-radius: 999px;
+      background: linear-gradient(135deg, rgba(0, 153, 153, 0.12), rgba(0, 102, 204, 0.1));
+      color: #0f4060;
+      border: 1px solid rgba(0, 153, 153, 0.15);
+      font-size: 0.93rem;
+      font-weight: 600;
+    }
+
+    .grid-2,
+    .grid-3,
+    .grid-4 {
+      display: grid;
+      gap: 18px;
+    }
+
+    .grid-2 { grid-template-columns: repeat(2, minmax(0, 1fr)); }
+    .grid-3 { grid-template-columns: repeat(3, minmax(0, 1fr)); }
+    .grid-4 { grid-template-columns: repeat(4, minmax(0, 1fr)); }
+
+    .card {
+      background: linear-gradient(180deg, #ffffff 0%, #fbfdff 100%);
+      border: 1px solid var(--border);
+      border-radius: 18px;
+      padding: 18px;
+      box-shadow: var(--shadow);
+    }
+
+    .card h3,
+    .card h4 {
+      margin: 0 0 12px;
+      font-size: 1.05rem;
+    }
+
+    .card p,
+    .card li {
+      color: var(--muted);
+      line-height: 1.8;
+      margin: 0;
+    }
+
+    .card ul {
+      margin: 0;
+      padding-left: 20px;
+    }
+
+    .card .mini {
+      font-size: 0.86rem;
+      color: #64748b;
+      margin-top: 8px;
+    }
+
+    .diagram,
+    .code-block,
+    .tree {
+      background: var(--code-bg);
+      color: var(--code-text);
+      padding: 20px;
+      border-radius: 18px;
+      overflow-x: auto;
+      border: 1px solid rgba(148, 163, 184, 0.16);
+      box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.04);
+    }
+
+    .diagram { font-size: 0.95rem; line-height: 1.5; }
+    .code-block { line-height: 1.7; }
+    .tree { line-height: 1.65; }
+
+    .pill-heading {
+      display: inline-flex;
+      align-items: center;
+      gap: 8px;
+      padding: 6px 12px;
+      border-radius: 999px;
+      background: rgba(0, 102, 204, 0.08);
+      color: var(--siemens-blue);
+      font-weight: 700;
+      margin: 14px 0 10px;
+      font-size: 0.9rem;
+    }
+
+    table {
+      width: 100%;
+      border-collapse: collapse;
+      overflow: hidden;
+      border-radius: 16px;
+      border: 1px solid var(--border);
+    }
+
+    th,
+    td {
+      padding: 14px 14px;
+      text-align: left;
+      vertical-align: top;
+      border-bottom: 1px solid var(--border);
+      line-height: 1.7;
+      font-size: 0.95rem;
+    }
+
+    th {
+      background: linear-gradient(180deg, #edf8f8, #e8f1ff);
+      color: #103451;
+      font-size: 0.92rem;
+    }
+
+    tr:last-child td { border-bottom: none; }
+
+    .flow-grid {
+      display: grid;
+      gap: 16px;
+    }
+
+    .flow-card {
+      border: 1px solid var(--border);
+      border-radius: 18px;
+      padding: 18px;
+      background: linear-gradient(180deg, #ffffff 0%, #fbfdff 100%);
+    }
+
+    .flow-card h3 {
+      margin: 0 0 12px;
+      font-size: 1.06rem;
+    }
+
+    .flow-card pre {
+      margin: 0;
+      white-space: pre-wrap;
+      word-break: break-word;
+      line-height: 1.85;
+    }
+
+    .metric-card {
+      border: 1px solid var(--border);
+      border-radius: 18px;
+      padding: 18px;
+      background: linear-gradient(180deg, #ffffff 0%, #fafdff 100%);
+      box-shadow: var(--shadow);
+    }
+
+    .metric-card h3 {
+      margin: 0 0 10px;
+      font-size: 1.02rem;
+      color: var(--siemens-blue);
+    }
+
+    .metric-card .need-gt {
+      display: inline-block;
+      padding: 4px 10px;
+      border-radius: 999px;
+      font-size: 0.82rem;
+      font-weight: 700;
+      margin-bottom: 12px;
+    }
+
+    .need-gt.yes { background: rgba(0, 102, 204, 0.1); color: var(--siemens-blue); }
+    .need-gt.no { background: rgba(0, 153, 153, 0.12); color: var(--siemens-teal); }
+
+    .formula {
+      background: linear-gradient(180deg, #f3fbfb 0%, #eef5ff 100%);
+      border: 1px solid rgba(0, 102, 204, 0.14);
+      color: #0f4060;
+      padding: 18px;
+      border-radius: 18px;
+      line-height: 1.9;
+    }
+
+    .steps {
+      margin: 0;
+      padding-left: 18px;
+      color: var(--muted);
+      line-height: 1.9;
+    }
+
+    .muted { color: var(--muted); }
+    .small { font-size: 0.9rem; }
+
+    .footer {
+      text-align: center;
+      color: #64748b;
+      font-size: 0.88rem;
+      padding: 8px 0 24px;
+    }
+
+    .mobile-topbar {
+      display: none;
+      position: sticky;
+      top: 0;
+      z-index: 25;
+      background: rgba(248, 250, 252, 0.92);
+      backdrop-filter: blur(10px);
+      border-bottom: 1px solid rgba(15, 23, 42, 0.08);
+      padding: 12px 16px;
+      margin: -28px -28px 18px;
+    }
+
+    .mobile-topbar button {
+      border: none;
+      background: linear-gradient(135deg, var(--siemens-teal), var(--siemens-blue));
+      color: #fff;
+      border-radius: 12px;
+      padding: 10px 14px;
+      font-weight: 700;
+      cursor: pointer;
+    }
+
+    .sidebar-backdrop {
+      display: none;
+      position: fixed;
+      inset: 0;
+      background: rgba(15, 23, 42, 0.42);
+      z-index: 15;
+    }
+
+    @media (max-width: 1180px) {
+      .hero-grid,
+      .grid-4 {
+        grid-template-columns: repeat(2, minmax(0, 1fr));
+      }
+    }
+
+    @media (max-width: 980px) {
+      .sidebar {
+        transform: translateX(-100%);
+        transition: transform 0.24s ease;
+      }
+      .sidebar.open { transform: translateX(0); }
+      .sidebar-backdrop.show { display: block; }
+      .main {
+        margin-left: 0;
+        width: 100%;
+      }
+      .mobile-topbar { display: flex; justify-content: space-between; align-items: center; }
+      .grid-2,
+      .grid-3,
+      .grid-4,
+      .hero-grid,
+      .hero-stats {
+        grid-template-columns: 1fr;
+      }
+    }
+
+    @media (max-width: 640px) {
+      .main { padding: 18px; }
+      .hero, section { padding: 20px; }
+      .mobile-topbar { margin: -18px -18px 16px; }
+      th, td { padding: 12px 10px; }
+    }
+  </style>
+</head>
+<body>
+  <div class="sidebar-backdrop" id="sidebar-backdrop"></div>
+  <div class="layout">
+    <aside class="sidebar" id="sidebar">
+      <div class="brand">
+        <h1>Siemens RAGAS<br />项目文档</h1>
+        <p>西门子医疗影像 CT 知识库 RAG 评估平台</p>
+      </div>
+      <nav>
+        <a href="#overview">1. 项目概述</a>
+        <a href="#architecture">2. 系统架构</a>
+        <a href="#modules">3. 核心模块说明</a>
+        <a href="#flows">4. 数据流说明</a>
+        <a href="#metrics">5. RAGAS 评估指标</a>
+        <a href="#apis">6. API 接口文档</a>
+        <a href="#weights">7. 指标权重配置</a>
+        <a href="#deployment">8. 部署说明</a>
+        <a href="#stack">9. 技术栈</a>
+        <a href="#structure">10. 目录结构</a>
+      </nav>
+      <div class="meta">
+        <div><b>生成时间</b><br />2026-06-24</div>
+        <div style="margin-top:10px;"><b>输出文件</b><br />project-overview.html</div>
+        <div style="margin-top:10px;"><b>来源</b><br />README / pyproject / main.py / webmain.py / rag_eval / webapp / scenarios / .env.example</div>
+      </div>
+    </aside>
+
+    <main class="main">
+      <div class="mobile-topbar">
+        <strong>Siemens RAGAS</strong>
+        <button id="menu-toggle" type="button">目录</button>
+      </div>
+
+      <div class="container">
+        <header class="hero">
+          <div class="eyebrow">Siemens Healthineers · RAG Evaluation Platform</div>
+          <h2>Siemens RAGAS RAG 评估平台：面向 CT 知识库的自动化质量评估闭环</h2>
+          <p>
+            本项目将 <b>PDF 文档解析</b>、<b>题库生成</b>、<b>在线/离线 RAGAS 评测</b>、<b>报告沉淀与 Web 可视化</b>
+            统一进一个可复用平台。CLI 与 FastAPI Web 控制台共享同一套 <code>rag_eval</code> 核心引擎，适合批量评估、持续优化与 Dify 实时评分集成。
+          </p>
+          <div class="hero-grid">
+            <div>
+              <div class="hero-stats">
+                <div class="stat"><b>3</b><span>入口形态<br />CLI / Web / REST API</span></div>
+                <div class="stat"><b>7</b><span>RAGAS 指标<br />含 GT 依赖与非依赖</span></div>
+                <div class="stat"><b>4</b><span>核心流程<br />Build / Eval / Pipeline / Score</span></div>
+                <div class="stat"><b>2</b><span>适配模式<br />HTTP / Python Adapter</span></div>
+              </div>
+            </div>
+            <div class="hero-note">
+              <b>核心价值：</b>将 PDF 资料转成可评测题库，再以 Siemens 医疗影像场景为中心完成答题、打分、加权汇总与报告产物沉淀，形成完整质量治理闭环。
+            </div>
+          </div>
+        </header>
+
+        <section id="overview">
+          <h2 class="section-title">1. 项目概述</h2>
+          <p class="section-intro">
+            <b>项目名称：</b>Siemens RAGAS RAG 评估平台。<br />
+            <b>目标：</b>为西门子医疗影像 CT 知识库 RAG 系统提供自动化质量评估。<br />
+            <b>定位：</b>既能作为离线评测框架，也能作为在线评估控制台与 API 服务，为知识库 QA、Prompt 迭代、检索策略优化提供统一基线。
+          </p>
+
+          <div class="grid-2">
+            <div class="card">
+              <h3>业务闭环</h3>
+              <p>PDF解析 → 题库生成 → RAGAS评测 → 报告可视化 → 再迭代。项目不仅覆盖评测本身，还覆盖评测数据源建设与运行产物管理。</p>
+            </div>
+            <div class="card">
+              <h3>运行方式</h3>
+              <p><code>main.py</code> 负责 CLI 评估与 dataset build，<code>webmain.py</code> 负责启动 FastAPI 控制台，<code>webapp.server</code> 暴露 REST API 与静态前端。</p>
+            </div>
+          </div>
+
+          <div class="pill-heading">技术亮点</div>
+          <div class="badges">
+            <span class="badge">统一 CLI / Web / API 三入口</span>
+            <span class="badge">阿里云 DocMind 文档解析</span>
+            <span class="badge">OpenAI 兼容模型接入</span>
+            <span class="badge">RAGAS 0.4.3 指标流水线</span>
+            <span class="badge">在线 / 离线双模式评估</span>
+            <span class="badge">Python / HTTP Adapter 扩展机制</span>
+            <span class="badge">场景 YAML 驱动</span>
+            <span class="badge">Pipeline 后台线程编排</span>
+            <span class="badge">Dify 实时单题评分接口</span>
+            <span class="badge">metric_weights + doc_weights 加权汇总</span>
+            <span class="badge">历史 run 资产沉淀</span>
+            <span class="badge">Web 报告聚合与分布分析</span>
+          </div>
+        </section>
+
+        <section id="architecture">
+          <h2 class="section-title">2. 系统架构</h2>
+          <p class="section-intro">
+            平台采用“多入口 + 单评估核心”的结构。CLI 和 Web 控制台都汇入 <code>rag_eval</code> 核心引擎；API 层只负责任务编排、配置管理与结果查询。
+          </p>
+
+          <pre class="diagram">┌─────────────────────────────────────────────────────────┐
+│                    siemens_ragas 平台                    │
+├─────────────┬───────────────────┬───────────────────────┤
+│  CLI 入口   │   Web 控制台      │   REST API            │
+│  main.py    │   webmain.py      │   FastAPI             │
+├─────────────┴───────────────────┴───────────────────────┤
+│                    核心评估引擎 (rag_eval)                │
+├──────────────┬──────────────────┬────────────────────────┤
+│ dataset_     │   execution/     │   metrics/             │
+│ builder/     │   evaluator.py   │   pipeline.py          │
+│ (PDF→题库)   │   (评估流程)     │   (RAGAS指标)          │
+├──────────────┴──────────────────┴────────────────────────┤
+│              外部依赖                                     │
+│  阿里云DocMind (PDF解析) │ OpenAI兼容API (LLM/Embedding) │
+└─────────────────────────────────────────────────────────┘</pre>
+
+          <div class="grid-3" style="margin-top:18px;">
+            <div class="card">
+              <h3>CLI 编排</h3>
+              <p><code>main.py</code> 通过互斥参数在 <code>--scenario</code> 与 <code>--dataset-build-config</code> 之间分派，分别进入评估流程与题库构建流程。</p>
+            </div>
+            <div class="card">
+              <h3>Web 服务</h3>
+              <p><code>webmain.py</code> 负责 uvicorn 启动、日志文件轮转与 host/port 配置；<code>webapp.server</code> 注册 runs、scenarios、evaluations、pipeline、score 等 API。</p>
+            </div>
+            <div class="card">
+              <h3>核心执行器</h3>
+              <p><code>rag_eval.execution.runner</code> 负责加载 scenario、构建模型与 adapter、调用 <code>Evaluator</code> 执行并写出标准化产物。</p>
+            </div>
+          </div>
+        </section>
+
+        <section id="modules">
+          <h2 class="section-title">3. 核心模块说明</h2>
+          <p class="section-intro">以下模块覆盖数据准备、评估执行、Web 管理与 Siemens 业务适配的主要职责边界。</p>
+
+          <table>
+            <thead>
+              <tr>
+                <th>模块</th>
+                <th>路径</th>
+                <th>职责</th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr>
+                <td><b>dataset_builder</b></td>
+                <td><code>rag_eval/dataset_builder/</code></td>
+                <td>PDF解析、source chunk 归一化、LLM 题目生成、草稿题库与构建产物写出。</td>
+              </tr>
+              <tr>
+                <td><b>execution</b></td>
+                <td><code>rag_eval/execution/</code></td>
+                <td>评估编排、在线/离线模式切换、adapter 调用、RAGAS 打分与结果聚合。</td>
+              </tr>
+              <tr>
+                <td><b>metrics</b></td>
+                <td><code>rag_eval/metrics/</code></td>
+                <td>RAGAS 指标注册、模型构建、评估管道装配、指标权重与文档权重聚合。</td>
+              </tr>
+              <tr>
+                <td><b>reporting</b></td>
+                <td><code>rag_eval/reporting/</code></td>
+                <td>运行产物写入、summary 生成、metadata 与 scenario 快照沉淀。</td>
+              </tr>
+              <tr>
+                <td><b>adapters</b></td>
+                <td><code>rag_eval/adapters/</code></td>
+                <td>HTTP/Python 应用适配器封装，把外部应用结果统一为 <code>answer / contexts / raw_response</code>。</td>
+              </tr>
+              <tr>
+                <td><b>webapp</b></td>
+                <td><code>webapp/</code></td>
+                <td>FastAPI Web 控制台、OpenAPI 文档、任务后台管理、场景扫描、历史报告查询。</td>
+              </tr>
+              <tr>
+                <td><b>apps</b></td>
+                <td><code>apps/siemens_pdf_qa/</code></td>
+                <td>西门子 CT 知识库问答适配器，基于 source chunk 证据构造 Prompt 并调用 OpenAI 兼容模型生成答案。</td>
+              </tr>
+            </tbody>
+          </table>
+
+          <div class="grid-3" style="margin-top:18px;">
+            <div class="card">
+              <h3>settings.py</h3>
+              <p>集中读取 <code>.env</code>：OpenAI Key/Base URL、RAGAS Judge/Embedding 模型、并发、阿里云 DocMind、<code>SCORE_API_TOKEN</code> 等。</p>
+            </div>
+            <div class="card">
+              <h3>registry.py</h3>
+              <p>定义 7 个受支持指标：faithfulness、answer_relevancy、context_recall、context_precision、noise_sensitivity、factual_correctness、semantic_similarity。</p>
+            </div>
+            <div class="card">
+              <h3>inline_scorer.py</h3>
+              <p>为 <code>/api/score</code> 提供模块级缓存评分器，按 <code>(judge_model, embedding_model)</code> 复用 LLM 与 embedding 连接。</p>
+            </div>
+          </div>
+        </section>
+
+        <section id="flows">
+          <h2 class="section-title">4. 数据流说明</h2>
+          <p class="section-intro">项目围绕四条关键流程展开：题库构建、在线评估、API 全链路 Pipeline 与 Dify 实时评分。</p>
+
+          <div class="flow-grid">
+            <div class="flow-card">
+              <h3>Flow A: 题库生成流程（Dataset Build）</h3>
+              <pre>PDF文件 → 阿里云DocMind解析 → 文档切片(source_chunks)
+→ LLM生成题目 → CSV题库文件 → 人工审核</pre>
+              <p class="small muted" style="margin-top:12px;">对应入口：<code>main.py --dataset-build-config</code>；核心实现：<code>rag_eval.dataset_builder.runner</code>。</p>
+            </div>
+
+            <div class="flow-card">
+              <h3>Flow B: RAGAS评估流程（Online Evaluation）</h3>
+              <pre>CSV题库 → 规范化样本 → 应用适配器(siemens_pdf_qa)
+→ LLM答题 → RAGAS指标计算 → 加权得分 → 报告产物</pre>
+              <p class="small muted" style="margin-top:12px;">对应 Siemens 场景：<code>scenarios/online/siemens-pdf-question-bank-online.yaml</code>，由 <code>apps.siemens_pdf_qa.adapter:run</code> 提供答案与证据片段。</p>
+            </div>
+
+            <div class="flow-card">
+              <h3>Flow C: 全链路 Pipeline（API触发）</h3>
+              <pre>POST /api/pipeline/jobs → 后台线程 → Flow A → Flow B → 产物路径</pre>
+              <p class="small muted" style="margin-top:12px;">由 <code>webapp.services.pipeline_task_manager</code> 在线程池中串行执行 <code>parsing_documents → generating_questions → evaluating</code> 三阶段，并返回 <code>scores.csv / summary.md / dataset.csv</code> 等路径。</p>
+            </div>
+
+            <div class="flow-card">
+              <h3>Flow D: Dify实时评分（/api/score）</h3>
+              <pre>Dify Agent → POST /api/score → InlineScorer → RAGAS metrics → 得分JSON</pre>
+              <p class="small muted" style="margin-top:12px;">当 <code>ground_truth</code> 缺失时，会自动跳过依赖参考答案的指标，并在响应中给出 <code>skipped_metrics</code>。</p>
+            </div>
+          </div>
+        </section>
+
+        <section id="metrics">
+          <h2 class="section-title">5. RAGAS 评估指标</h2>
+          <p class="section-intro">平台当前支持 7 个指标，既覆盖回答忠实度和相关性，也覆盖对参考答案、噪声片段和语义相似度的衡量。</p>
+
+          <div class="grid-4">
+            <div class="metric-card">
+              <h3>faithfulness</h3>
+              <span class="need-gt no">无需 ground_truth</span>
+              <p>回答对检索内容的忠实度，用于防止模型脱离证据“幻觉式”作答。</p>
+            </div>
+            <div class="metric-card">
+              <h3>answer_relevancy</h3>
+              <span class="need-gt no">无需 ground_truth</span>
+              <p>衡量回答与问题本身的相关性，判断是否真正命中用户所问。</p>
+            </div>
+            <div class="metric-card">
+              <h3>context_precision</h3>
+              <span class="need-gt no">无需 ground_truth</span>
+              <p>衡量检索片段的精准度，关注上下文中与回答真正相关的比例。</p>
+            </div>
+            <div class="metric-card">
+              <h3>context_recall</h3>
+              <span class="need-gt yes">需要 ground_truth</span>
+              <p>检索内容对标准答案覆盖程度，反映召回是否足够支撑正确作答。</p>
+            </div>
+            <div class="metric-card">
+              <h3>noise_sensitivity</h3>
+              <span class="need-gt yes">需要 ground_truth</span>
+              <p>系统对噪声检索片段的鲁棒性，越不受无关片段干扰越好。</p>
+            </div>
+            <div class="metric-card">
+              <h3>factual_correctness</h3>
+              <span class="need-gt yes">需要 ground_truth</span>
+              <p>与标准答案对齐的事实准确性，代码中作为端到端事实正确度指标。</p>
+            </div>
+            <div class="metric-card">
+              <h3>semantic_similarity</h3>
+              <span class="need-gt yes">需要 ground_truth</span>
+              <p>回答与标准答案的语义相似度，依赖 embedding，不需要额外 LLM 调用。</p>
+            </div>
+            <div class="metric-card">
+              <h3>指标默认集合</h3>
+              <span class="need-gt no">在线评分默认</span>
+              <p><code>/api/score</code> 的默认指标集合为 faithfulness、answer_relevancy、context_recall、context_precision。</p>
+            </div>
+          </div>
+        </section>
+
+        <section id="apis">
+          <h2 class="section-title">6. API 接口文档</h2>
+          <p class="section-intro">Web 层基于 FastAPI 提供任务提交、历史查询、LLM 配置管理与 Dify 实时评分接口。</p>
+
+          <table>
+            <thead>
+              <tr>
+                <th>方法</th>
+                <th>路径</th>
+                <th>说明</th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr><td><b>POST</b></td><td><code>/api/pipeline/jobs</code></td><td>提交全链路评估任务，后台线程自动执行 PDF 解析、题库生成和在线评估。</td></tr>
+              <tr><td><b>GET</b></td><td><code>/api/pipeline/jobs/{id}</code></td><td>查询指定 Pipeline 任务状态、阶段、日志和产物路径。</td></tr>
+              <tr><td><b>POST</b></td><td><code>/api/score</code></td><td>Dify 实时评分接口，接收单条问答并返回各指标得分与综合得分。</td></tr>
+              <tr><td><b>POST</b></td><td><code>/api/llm-profiles/probe</code></td><td>临时测试 LLM / Embedding 连通性，无需先保存配置。</td></tr>
+              <tr><td><b>POST</b></td><td><code>/api/llm-profiles</code></td><td>创建命名的 LLM 配置档案，可供场景或控制台复用。</td></tr>
+              <tr><td><b>POST</b></td><td><code>/api/evaluations</code></td><td>基于已有场景 YAML 启动一次后台评估任务。</td></tr>
+              <tr><td><b>GET</b></td><td><code>/api/runs</code></td><td>获取历史运行列表，用于控制台报告页与明细页渲染。</td></tr>
+            </tbody>
+          </table>
+
+          <div class="grid-2" style="margin-top:18px;">
+            <div class="card">
+              <h3>/api/score 请求示例</h3>
+              <pre class="code-block">POST /api/score
+Authorization: Bearer &lt;token&gt;
+Content-Type: application/json
+
+{
+  "question": "双源CT的时间分辨率是多少?",
+  "answer": "双源CT的单扇区时间分辨率为75ms。",
+  "contexts": "双源CT采用两套管-探测器系统 |||| 单扇区采集旋转135度",
+  "ground_truth": "双源CT单扇区时间分辨率为75ms，需旋转135度。",
+  "context_separator": " |||| ",
+  "metrics": [
+    "faithfulness",
+    "answer_relevancy",
+    "context_recall",
+    "context_precision"
+  ],
+  "judge_model": "gpt-5",
+  "embedding_model": "text-embedding-3-small"
+}</pre>
+            </div>
+            <div class="card">
+              <h3>/api/score 响应示例</h3>
+              <pre class="code-block">{
+  "scores": {
+    "faithfulness": 0.875,
+    "answer_relevancy": 0.92,
+    "context_recall": 0.81,
+    "context_precision": 0.85
+  },
+  "weighted_score": 0.8638,
+  "latency_ms": 3420,
+  "skipped_metrics": [],
+  "error": null
+}</pre>
+            </div>
+          </div>
+
+          <div class="grid-3" style="margin-top:18px;">
+            <div class="card">
+              <h3>Pipeline API</h3>
+              <p>返回 <code>job_id</code> 后即可轮询。阶段枚举包括 <code>queued</code>、<code>running</code>、<code>parsing_documents</code>、<code>evaluating</code> 与 <code>done</code>。</p>
+            </div>
+            <div class="card">
+              <h3>LLM Profiles</h3>
+              <p>支持保存 base URL、API Key、model、timeout，并通过 <code>/apply</code> 将配置写回场景 YAML，同时可补充 metric/doc 权重。</p>
+            </div>
+            <div class="card">
+              <h3>Runs API</h3>
+              <p>历史 run 会读取 <code>summary.md</code>、<code>scores.csv</code> 与 <code>scenario.snapshot.yaml</code>，聚合指标均值、分布和最低分样本。</p>
+            </div>
+          </div>
+        </section>
+
+        <section id="weights">
+          <h2 class="section-title">7. 指标权重配置</h2>
+          <p class="section-intro">平台支持两级权重：<code>metric_weights</code> 控制不同指标的重要性，<code>doc_weights</code> 控制不同文档对总体分数的影响。</p>
+
+          <pre class="code-block">metric_weights:
+  faithfulness: 0.35
+  context_recall: 0.25
+  context_precision: 0.20
+  answer_relevancy: 0.20
+
+doc_weights:
+  "322_双源CT.pdf": 2.0</pre>
+
+          <div class="formula" style="margin-top:18px;">
+            <b>weighted_score 计算逻辑：</b><br />
+            1. <b>单样本综合分</b> = Σ(有效指标分 × 指标权重) / Σ(有效指标权重)<br />
+            2. <b>总体综合分</b> = Σ(单样本综合分 × 文档权重) / Σ(文档权重)<br />
+            3. 当权重未配置时，代码会自动退化为默认 1.0，即普通平均。<br />
+            4. <code>doc_weights</code> 不仅影响总体综合分，也会影响控制台中按文档聚合后的指标均值。
+          </div>
+
+          <div class="grid-2" style="margin-top:18px;">
+            <div class="card">
+              <h3>代码依据</h3>
+              <p><code>rag_eval.metrics.weights.compute_weighted_score()</code> 负责单样本指标加权；<code>compute_overall_weighted_score_mean()</code> 负责跨样本文档加权。</p>
+            </div>
+            <div class="card">
+              <h3>配置入口</h3>
+              <p>场景 YAML 可直接定义；Web 控制台也可通过 <code>/api/llm-profiles/apply</code> 将权重补丁写回场景文件。</p>
+            </div>
+          </div>
+        </section>
+
+        <section id="deployment">
+          <h2 class="section-title">8. 部署说明</h2>
+          <p class="section-intro">仓库自带 <code>deploy.sh</code>，适合 Linux 一键部署 Web 控制台与相关依赖。</p>
+
+          <div class="grid-2">
+            <div class="card">
+              <h3>Linux 部署步骤</h3>
+              <ol class="steps">
+                <li>准备 Python 3.12+ 环境，执行 <code>bash deploy.sh</code>。</li>
+                <li>脚本会自动创建 <code>.venv</code>、安装 <code>pyproject.toml</code> 依赖，并补装 <code>fastapi / uvicorn / httpx</code>。</li>
+                <li>若 <code>.env</code> 不存在，将从 <code>.env.example</code> 复制一份模板。</li>
+                <li>脚本初始化 <code>configs / logs / outputs / datasets</code> 目录，并尝试生成 demo 数据。</li>
+                <li>最后使用 <code>webmain.py</code> 后台启动服务，默认端口 8800，冲突时回退到 8801。</li>
+              </ol>
+            </div>
+            <div class="card">
+              <h3>常用命令</h3>
+              <pre class="code-block"># 依赖安装
+uv sync
+
+# CLI 运行在线/离线评估
+.\.venv\Scripts\python.exe main.py --scenario scenarios\online\siemens-pdf-question-bank-online.yaml
+
+# CLI 运行题库生成
+.\.venv\Scripts\python.exe main.py --dataset-build-config scenarios\siemens_build\siemens-pdf-build.yaml
+
+# 启动 Web 控制台
+.\.venv\Scripts\python.exe webmain.py --host 127.0.0.1 --port 8800</pre>
+            </div>
+          </div>
+
+          <div class="pill-heading">关键 .env 配置</div>
+          <table>
+            <thead>
+              <tr>
+                <th>变量</th>
+                <th>用途</th>
+                <th>示例 / 默认</th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr><td><code>OPENAI_API_KEY</code></td><td>OpenAI 兼容接口凭据</td><td><code>your-api-key</code></td></tr>
+              <tr><td><code>OPENAI_BASE_URL</code></td><td>统一 LLM / Embedding 网关地址</td><td><code>http://6.86.80.4:30080/v1</code></td></tr>
+              <tr><td><code>RAGAS_JUDGE_MODEL</code></td><td>RAGAS Judge 默认模型</td><td><code>gpt-5</code></td></tr>
+              <tr><td><code>RAGAS_EMBEDDING_MODEL</code></td><td>Embedding 默认模型</td><td><code>text-embedding-3-small</code></td></tr>
+              <tr><td><code>ALIBABA_ACCESS_KEY_ID</code> / <code>ALIBABA_ACCESS_KEY_SECRET</code></td><td>阿里云 DocMind 凭据</td><td>dataset build 必填</td></tr>
+              <tr><td><code>ALIBABA_ENDPOINT</code></td><td>DocMind 服务域名</td><td><code>docmind-api.cn-hangzhou.aliyuncs.com</code></td></tr>
+              <tr><td><code>DATASET_GENERATOR_MODEL</code></td><td>题库生成默认模型</td><td><code>qwen3.6-plus</code></td></tr>
+              <tr><td><code>SCORE_API_TOKEN</code></td><td><code>/api/score</code> Bearer 鉴权令牌</td><td>留空则不鉴权</td></tr>
+              <tr><td><code>RAGAS_METRIC_TIMEOUT_SECONDS</code></td><td>RAGAS 指标计算超时</td><td><code>300</code>（7指标建议值）</td></tr>
+            </tbody>
+          </table>
+        </section>
+
+        <section id="stack">
+          <h2 class="section-title">9. 技术栈</h2>
+          <p class="section-intro">从依赖声明、Web 服务实现与测试代码来看，项目的技术栈如下。</p>
+
+          <div class="stack-badges">
+            <span class="badge"><b>后端</b> Python 3.12 · FastAPI · RAGAS 0.4.3 · Pydantic v2 · uvicorn</span>
+            <span class="badge"><b>AI / ML</b> OpenAI SDK · LangChain · ragas · instructor 风格结构</span>
+            <span class="badge"><b>文档解析</b> 阿里云 DocMind</span>
+            <span class="badge"><b>前端</b> Vanilla JS · Chart.js 风格报告页</span>
+            <span class="badge"><b>测试</b> pytest · FastAPI TestClient</span>
+            <span class="badge"><b>工具链</b> uv · pyproject.toml · YAML 场景驱动</span>
+          </div>
+
+          <div class="grid-3" style="margin-top:18px;">
+            <div class="card">
+              <h3>依赖声明</h3>
+              <p><code>pyproject.toml</code> 中列出了 <code>ragas==0.4.3</code>、<code>langchain-openai</code>、<code>datasets</code>、<code>pydantic-settings</code>、阿里云 DocMind SDK 等核心依赖。</p>
+            </div>
+            <div class="card">
+              <h3>Web 生态</h3>
+              <p>控制台基于 FastAPI + 静态页面，服务器通过 <code>webmain.py</code> 配置日志和 uvicorn，前端报告页结合图表与表格展示历史 run。</p>
+            </div>
+            <div class="card">
+              <h3>测试现状</h3>
+              <p>仓库同时存在 <code>pytest</code> 与 <code>fastapi.testclient.TestClient</code> 用例，涵盖 Pipeline、权重聚合、实时评分与 LLM 配置接口。</p>
+            </div>
+          </div>
+        </section>
+
+        <section id="structure">
+          <h2 class="section-title">10. 目录结构</h2>
+          <p class="section-intro">以下树状图概括了项目中最关键的源码、配置、数据、输出与测试位置。</p>
+
+          <pre class="tree">siemens_ragas/
+├── README.md
+├── pyproject.toml
+├── main.py
+├── webmain.py
+├── deploy.sh
+├── .env.example
+├── apps/
+│   ├── pdf_question_bank/
+│   ├── sample_python/
+│   └── siemens_pdf_qa/
+├── datasets/
+│   ├── raw/
+│   └── normalized/
+├── docs/
+├── outputs/
+├── scenarios/
+│   ├── online/
+│   ├── offline/
+│   └── siemens_build/
+├── tests/
+│   ├── webapp/
+│   ├── test_pipeline.py
+│   ├── test_weights.py
+│   └── test_webapp_report_builder.py
+├── rag_eval/
+│   ├── adapters/
+│   ├── advisor/
+│   ├── config/
+│   ├── datasets/
+│   ├── dataset_builder/
+│   │   ├── generator/
+│   │   └── parser/
+│   ├── execution/
+│   ├── metrics/
+│   ├── reporting/
+│   └── shared/
+└── webapp/
+    ├── api/
+    ├── services/
+    └── static/
+        ├── css/
+        └── js/</pre>
+
+          <div class="grid-2" style="margin-top:18px;">
+            <div class="card">
+              <h3>源码主路径</h3>
+              <p><code>rag_eval/</code> 是平台核心，<code>webapp/</code> 负责服务端 API 与控制台，<code>apps/</code> 放置面向不同业务应用的 adapter。</p>
+            </div>
+            <div class="card">
+              <h3>配置与产物</h3>
+              <p><code>scenarios/</code> 维护 YAML 场景，<code>datasets/</code> 存放原始/规范化数据，<code>outputs/</code> 产出评测运行结果与 dataset build 工件。</p>
+            </div>
+          </div>
+        </section>
+
+        <div class="footer">
+          Generated for <b>siemens_ragas</b> · Self-contained HTML overview · Siemens teal/blue documentation theme
+        </div>
+      </div>
+    </main>
+  </div>
+
+  <script>
+    const sidebar = document.getElementById('sidebar');
+    const menuToggle = document.getElementById('menu-toggle');
+    const backdrop = document.getElementById('sidebar-backdrop');
+    const navLinks = Array.from(document.querySelectorAll('.sidebar nav a'));
+    const sections = Array.from(document.querySelectorAll('section[id]'));
+
+    function closeSidebar() {
+      sidebar.classList.remove('open');
+      backdrop.classList.remove('show');
+    }
+
+    function openSidebar() {
+      sidebar.classList.add('open');
+      backdrop.classList.add('show');
+    }
+
+    if (menuToggle) {
+      menuToggle.addEventListener('click', () => {
+        if (sidebar.classList.contains('open')) {
+          closeSidebar();
+        } else {
+          openSidebar();
+        }
+      });
+    }
+
+    if (backdrop) {
+      backdrop.addEventListener('click', closeSidebar);
+    }
+
+    navLinks.forEach(link => {
+      link.addEventListener('click', () => {
+        if (window.innerWidth <= 980) closeSidebar();
+      });
+    });
+
+    const observer = new IntersectionObserver((entries) => {
+      entries.forEach(entry => {
+        if (!entry.isIntersecting) return;
+        const id = entry.target.getAttribute('id');
+        navLinks.forEach(link => {
+          const active = link.getAttribute('href') === `#${id}`;
+          link.classList.toggle('active', active);
+        });
+      });
+    }, {
+      rootMargin: '-20% 0px -60% 0px',
+      threshold: 0.1
+    });
+
+    sections.forEach(section => observer.observe(section));
+  </script>
+</body>
+</html>

模块	路径	职责
dataset_builder	`rag_eval/dataset_builder/`	PDF解析、source chunk 归一化、LLM 题目生成、草稿题库与构建产物写出。
execution	`rag_eval/execution/`	评估编排、在线/离线模式切换、adapter 调用、RAGAS 打分与结果聚合。
metrics	`rag_eval/metrics/`	RAGAS 指标注册、模型构建、评估管道装配、指标权重与文档权重聚合。
reporting	`rag_eval/reporting/`	运行产物写入、summary 生成、metadata 与 scenario 快照沉淀。
adapters	`rag_eval/adapters/`	HTTP/Python 应用适配器封装，把外部应用结果统一为 `answer / contexts / raw_response`。
webapp	`webapp/`	FastAPI Web 控制台、OpenAPI 文档、任务后台管理、场景扫描、历史报告查询。
apps	`apps/siemens_pdf_qa/`	西门子 CT 知识库问答适配器，基于 source chunk 证据构造 Prompt 并调用 OpenAI 兼容模型生成答案。
方法	路径	说明
POST	`/api/pipeline/jobs`	提交全链路评估任务，后台线程自动执行 PDF 解析、题库生成和在线评估。
GET	`/api/pipeline/jobs/{id}`	查询指定 Pipeline 任务状态、阶段、日志和产物路径。
POST	`/api/score`	Dify 实时评分接口，接收单条问答并返回各指标得分与综合得分。
POST	`/api/llm-profiles/probe`	临时测试 LLM / Embedding 连通性，无需先保存配置。
POST	`/api/llm-profiles`	创建命名的 LLM 配置档案，可供场景或控制台复用。
POST	`/api/evaluations`	基于已有场景 YAML 启动一次后台评估任务。
GET	`/api/runs`	获取历史运行列表，用于控制台报告页与明细页渲染。
变量	用途	示例 / 默认
`OPENAI_API_KEY`	OpenAI 兼容接口凭据	`your-api-key`
`OPENAI_BASE_URL`	统一 LLM / Embedding 网关地址	`http://6.86.80.4:30080/v1`
`RAGAS_JUDGE_MODEL`	RAGAS Judge 默认模型	`gpt-5`
`RAGAS_EMBEDDING_MODEL`	Embedding 默认模型	`text-embedding-3-small`
`ALIBABA_ACCESS_KEY_ID` / `ALIBABA_ACCESS_KEY_SECRET`	阿里云 DocMind 凭据	dataset build 必填
`ALIBABA_ENDPOINT`	DocMind 服务域名	`docmind-api.cn-hangzhou.aliyuncs.com`
`DATASET_GENERATOR_MODEL`	题库生成默认模型	`qwen3.6-plus`
`SCORE_API_TOKEN`	`/api/score` Bearer 鉴权令牌	留空则不鉴权
`RAGAS_METRIC_TIMEOUT_SECONDS`	RAGAS 指标计算超时	`300`（7指标建议值）