From fa42853d4133dcea7ded3deee038983f4e3e52ae Mon Sep 17 00:00:00 2001 From: "guangfei.zhao" Date: Tue, 25 Nov 2025 14:46:25 +0800 Subject: [PATCH] feat(chunk): add chunk selection and highlighting in PDF viewer --- eslint.config.js | 14 +- .../chunk/components/ChunkListResult.tsx | 23 +-- src/pages/chunk/parsed-result.tsx | 136 ++++++++++++++++-- tsconfig.json | 2 +- 4 files changed, 150 insertions(+), 25 deletions(-) diff --git a/eslint.config.js b/eslint.config.js index 78b365a..dbad180 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -6,18 +6,28 @@ import tseslint from 'typescript-eslint' import { defineConfig, globalIgnores } from 'eslint/config' export default defineConfig([ - globalIgnores(['dist', 'rag_web_core']), + globalIgnores(['dist', 'ragflow_web']), { files: ['**/*.{ts,tsx}'], + rules: {}, extends: [ js.configs.recommended, - tseslint.configs.recommended, + // tseslint.configs.recommended, reactHooks.configs['recommended-latest'], reactRefresh.configs.vite, ], languageOptions: { ecmaVersion: 2020, globals: globals.browser, + parser: tseslint.parser, + parserOptions: { + ecmaFeatures: { jsx: true }, + }, + }, + plugins: { + '@typescript-eslint': tseslint.plugin, + 'react-hooks': reactHooks, + 'react-refresh': reactRefresh, }, }, ]) diff --git a/src/pages/chunk/components/ChunkListResult.tsx b/src/pages/chunk/components/ChunkListResult.tsx index 4ee281d..549026d 100644 --- a/src/pages/chunk/components/ChunkListResult.tsx +++ b/src/pages/chunk/components/ChunkListResult.tsx @@ -60,10 +60,12 @@ interface ChunkListResultProps { onRefresh?: () => void; docName?: string; onLocate?: (chunk: IChunk) => void; + selectedChunkId?: string; + onSelect?: (chunk: IChunk) => void; } function ChunkListResult(props: ChunkListResultProps) { - const { doc_id, chunks, total, loading, page, pageSize, onPageChange, onRefresh, onLocate } = props; + const { doc_id, chunks, total, loading, page, pageSize, onPageChange, onRefresh, onLocate, selectedChunkId, onSelect } = props; const { t } = useTranslation(); // 选择状态 @@ -363,14 +365,13 @@ function ChunkListResult(props: ChunkListResultProps) { '&:hover': { boxShadow: 2, }, - border: selectedChunks.includes(chunk.chunk_id) ? '2px solid' : '1px solid', - borderColor: selectedChunks.includes(chunk.chunk_id) - ? 'primary.main' - : chunk.available_int === 1 - ? 'success.light' - : 'grey.300', - backgroundColor: selectedChunks.includes(chunk.chunk_id) ? 'action.selected' : 'background.paper', + border: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id) ? '2px solid' : '1px solid', + borderColor: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id) + ? 'primary.main' + : (chunk.available_int === 1 ? 'success.light' : 'grey.300'), + backgroundColor: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id) ? 'action.selected' : 'background.paper', }} + onClick={() => onSelect?.(chunk)} > {/* 头部操作区域 */} @@ -389,7 +390,7 @@ function ChunkListResult(props: ChunkListResultProps) { /> {/* 定位到文档位置 */} - onLocate?.(chunk)}> + { onSelect?.(chunk); onLocate?.(chunk); }}> @@ -425,7 +426,7 @@ function ChunkListResult(props: ChunkListResultProps) { } } }} - onClick={() => handleImageClick(`${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`, chunk)} + onClick={() => { onSelect?.(chunk); handleImageClick(`${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`, chunk); }} onMouseEnter={(e) => handleImageHover(e, `${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`)} onMouseLeave={handleImageHoverClose} > @@ -752,4 +753,4 @@ function ChunkListResult(props: ChunkListResultProps) { ); } -export default ChunkListResult; \ No newline at end of file +export default ChunkListResult; diff --git a/src/pages/chunk/parsed-result.tsx b/src/pages/chunk/parsed-result.tsx index 52dfbfa..e9cdae2 100644 --- a/src/pages/chunk/parsed-result.tsx +++ b/src/pages/chunk/parsed-result.tsx @@ -37,17 +37,7 @@ function ChunkParsedResult() { const kb_id = searchParams.get('kb_id'); const doc_id = searchParams.get('doc_id'); - const [knowledgeBase, setKnowledgeBase] = useState(null); - const [document, setDocument] = useState(null); const [searchKeyword, setSearchKeyword] = useState(''); - const [documentFile, setDocumentFile] = useState(null); - const [fileUrl, setFileUrl] = useState(''); - const [fileLoading, setFileLoading] = useState(false); - const [previewOverrideUrl, setPreviewOverrideUrl] = useState(''); - const [focusPage, setFocusPage] = useState(null); - const abortControllerRef = useRef(null); - const pdfContainerRef = useRef(null); - const [pdfRendered, setPdfRendered] = useState(false); // 使用chunk列表hook const { @@ -66,6 +56,19 @@ function ChunkParsedResult() { keywords: searchKeyword }); + const [knowledgeBase, setKnowledgeBase] = useState(null); + const [document, setDocument] = useState(null); + const [documentFile, setDocumentFile] = useState(null); + const [fileUrl, setFileUrl] = useState(''); + const [fileLoading, setFileLoading] = useState(false); + const [previewOverrideUrl, setPreviewOverrideUrl] = useState(''); + const [focusPage, setFocusPage] = useState(null); + const abortControllerRef = useRef(null); + const pdfContainerRef = useRef(null); + const [pdfRendered, setPdfRendered] = useState(false); + const [selectedChunkId, setSelectedChunkId] = useState(''); + const selectedChunk = chunks.find((c) => c.chunk_id === selectedChunkId) || null; + // 获取知识库和文档信息 useEffect(() => { const fetchData = async () => { @@ -204,6 +207,10 @@ function ChunkParsedResult() { const pageWrapper = window.document.createElement('div'); pageWrapper.setAttribute('data-page-index', String(pageNum)); + pageWrapper.setAttribute('data-scale', String(scale)); + pageWrapper.setAttribute('data-viewport-width', String(viewport.width)); + pageWrapper.setAttribute('data-viewport-height', String(viewport.height)); + pageWrapper.style.position = 'relative'; pageWrapper.appendChild(canvas); container.appendChild(pageWrapper); } @@ -278,6 +285,7 @@ function ChunkParsedResult() { if (documentFile?.type === 'application/pdf') { setFocusPage(page && !Number.isNaN(page) ? page : null); setPreviewOverrideUrl(''); + setSelectedChunkId(chunk.chunk_id || ''); return; } @@ -286,6 +294,107 @@ function ChunkParsedResult() { setFocusPage(null); }; + useEffect(() => { + if (documentFile?.type !== 'application/pdf' || !pdfRendered) return; + const container = pdfContainerRef.current; + if (!container) return; + + Array.from(container.querySelectorAll('.pdf-highlight-layer')).forEach((el) => el.remove()); + + const chunk = selectedChunk; + const positions: any[] = (chunk?.positions || []) as any[]; + if (!Array.isArray(positions) || positions.length === 0) return; + + // 1) 将位置按页分组 + const pageMap = new Map>(); + positions.forEach((pos) => { + if (!Array.isArray(pos) || pos.length < 5) return; + const p = Number(pos[0]); + const x1 = Number(pos[1]); + const x2 = Number(pos[2]); + const y1 = Number(pos[3]); + const y2 = Number(pos[4]); + const list = pageMap.get(p) || []; + list.push({ x1, x2, y1, y2 }); + pageMap.set(p, list); + }); + + // 2) 设置容差与行距阈值:用于合并同一段落的多行 + const XTOL = 2; // x范围容差,像素 + const GAP_TOL = 8; // 行间距阈值,像素 + const quant = (v: number) => Math.round(v / XTOL) * XTOL; + + // 3) 遍历每页:按量化后的 x1/x2 分桶,再按 y1 合并相邻行 + pageMap.forEach((segList, pageNumber) => { + const pageWrapper = container.querySelector(`[data-page-index="${pageNumber}"]`) as HTMLElement | null; + if (!pageWrapper) return; + const scale = Number(pageWrapper.getAttribute('data-scale') || '1'); + const canvas = pageWrapper.querySelector('canvas') as HTMLCanvasElement | null; + if (!canvas) return; + + const cssWidth = canvas.clientWidth; + const cssHeight = canvas.clientHeight; + + // 分桶:相同(或近似)列宽的文本认为是一段 + const buckets = new Map>(); + segList.forEach(s => { + const key = `${quant(s.x1)}-${quant(s.x2)}`; + const arr = buckets.get(key) || []; + arr.push(s); + buckets.set(key, arr); + }); + + const layer = window.document.createElement('div'); + layer.className = 'pdf-highlight-layer'; + layer.style.position = 'absolute'; + layer.style.left = '0px'; + layer.style.top = '0px'; + layer.style.width = `${cssWidth}px`; + layer.style.height = `${cssHeight}px`; + layer.style.pointerEvents = 'none'; + + buckets.forEach((bucketSegs) => { + const segs = bucketSegs.slice().sort((a,b) => a.y1 - b.y1); + const merged: Array<{x1:number;x2:number;y1:number;y2:number}> = []; + segs.forEach(seg => { + const last = merged[merged.length - 1]; + if (!last) { + merged.push({ ...seg }); + return; + } + const similarWidth = Math.abs(seg.x1 - last.x1) <= XTOL && Math.abs(seg.x2 - last.x2) <= XTOL; + const contiguous = seg.y1 <= last.y2 + GAP_TOL; // 上一行的下边缘到当前行的上边缘间隔很小 + if (similarWidth && contiguous) { + last.y2 = Math.max(last.y2, seg.y2); + } else { + merged.push({ ...seg }); + } + }); + + merged.forEach(m => { + const left = m.x1 * scale; + const width = (m.x2 - m.x1) * scale; + const top = m.y1 * scale; + const height = (m.y2 - m.y1) * scale; + + const rect = window.document.createElement('div'); + rect.style.position = 'absolute'; + rect.style.left = `${left}px`; + rect.style.top = `${top}px`; + rect.style.width = `${Math.max(0, width)}px`; + rect.style.height = `${Math.max(0, height)}px`; + rect.style.background = 'rgba(255, 230, 0, 0.30)'; + rect.style.border = '1px solid rgba(255, 193, 7, 0.75)'; + rect.style.borderRadius = '2px'; + rect.style.pointerEvents = 'none'; + layer.appendChild(rect); + }); + }); + + pageWrapper.appendChild(layer); + }); + }, [selectedChunk, pdfRendered, documentFile]); + // 渲染左侧预览 const renderPreview = () => { // 如果有覆盖的图片URL,直接显示图片 @@ -442,6 +551,11 @@ function ChunkParsedResult() { onRefresh={refresh} docName={document?.name} onLocate={handleLocate} + selectedChunkId={selectedChunkId} + onSelect={(chunk) => { + setSelectedChunkId(chunk.chunk_id || ''); + handleLocate(chunk); + }} /> @@ -451,4 +565,4 @@ function ChunkParsedResult() { ); } -export default ChunkParsedResult; \ No newline at end of file +export default ChunkParsedResult; diff --git a/tsconfig.json b/tsconfig.json index 1574bc0..92f1a47 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -7,6 +7,6 @@ ], // exclude rag_web_core/**/* "exclude": [ - "rag_web_core/**" + "ragflow_web/**" ] }