feat(chunk): add chunk selection and highlighting in PDF viewer

This commit is contained in:
2025-11-25 14:46:25 +08:00
parent 70926de445
commit fa42853d41
4 changed files with 150 additions and 25 deletions

View File

@@ -6,18 +6,28 @@ import tseslint from 'typescript-eslint'
import { defineConfig, globalIgnores } from 'eslint/config'
export default defineConfig([
globalIgnores(['dist', 'rag_web_core']),
globalIgnores(['dist', 'ragflow_web']),
{
files: ['**/*.{ts,tsx}'],
rules: {},
extends: [
js.configs.recommended,
tseslint.configs.recommended,
// tseslint.configs.recommended,
reactHooks.configs['recommended-latest'],
reactRefresh.configs.vite,
],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
parser: tseslint.parser,
parserOptions: {
ecmaFeatures: { jsx: true },
},
},
plugins: {
'@typescript-eslint': tseslint.plugin,
'react-hooks': reactHooks,
'react-refresh': reactRefresh,
},
},
])

View File

@@ -60,10 +60,12 @@ interface ChunkListResultProps {
onRefresh?: () => void;
docName?: string;
onLocate?: (chunk: IChunk) => void;
selectedChunkId?: string;
onSelect?: (chunk: IChunk) => void;
}
function ChunkListResult(props: ChunkListResultProps) {
const { doc_id, chunks, total, loading, page, pageSize, onPageChange, onRefresh, onLocate } = props;
const { doc_id, chunks, total, loading, page, pageSize, onPageChange, onRefresh, onLocate, selectedChunkId, onSelect } = props;
const { t } = useTranslation();
// 选择状态
@@ -363,14 +365,13 @@ function ChunkListResult(props: ChunkListResultProps) {
'&:hover': {
boxShadow: 2,
},
border: selectedChunks.includes(chunk.chunk_id) ? '2px solid' : '1px solid',
borderColor: selectedChunks.includes(chunk.chunk_id)
? 'primary.main'
: chunk.available_int === 1
? 'success.light'
: 'grey.300',
backgroundColor: selectedChunks.includes(chunk.chunk_id) ? 'action.selected' : 'background.paper',
border: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id) ? '2px solid' : '1px solid',
borderColor: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id)
? 'primary.main'
: (chunk.available_int === 1 ? 'success.light' : 'grey.300'),
backgroundColor: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id) ? 'action.selected' : 'background.paper',
}}
onClick={() => onSelect?.(chunk)}
>
<CardContent sx={{ p: 2 }}>
{/* 头部操作区域 */}
@@ -389,7 +390,7 @@ function ChunkListResult(props: ChunkListResultProps) {
/>
{/* 定位到文档位置 */}
<Tooltip title={'定位'}>
<IconButton size="small" onClick={() => onLocate?.(chunk)}>
<IconButton size="small" onClick={() => { onSelect?.(chunk); onLocate?.(chunk); }}>
<ZoomInIcon />
</IconButton>
</Tooltip>
@@ -425,7 +426,7 @@ function ChunkListResult(props: ChunkListResultProps) {
}
}
}}
onClick={() => handleImageClick(`${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`, chunk)}
onClick={() => { onSelect?.(chunk); handleImageClick(`${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`, chunk); }}
onMouseEnter={(e) => handleImageHover(e, `${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`)}
onMouseLeave={handleImageHoverClose}
>
@@ -752,4 +753,4 @@ function ChunkListResult(props: ChunkListResultProps) {
);
}
export default ChunkListResult;
export default ChunkListResult;

View File

@@ -37,17 +37,7 @@ function ChunkParsedResult() {
const kb_id = searchParams.get('kb_id');
const doc_id = searchParams.get('doc_id');
const [knowledgeBase, setKnowledgeBase] = useState<IKnowledge | null>(null);
const [document, setDocument] = useState<IKnowledgeFile | null>(null);
const [searchKeyword, setSearchKeyword] = useState('');
const [documentFile, setDocumentFile] = useState<Blob | null>(null);
const [fileUrl, setFileUrl] = useState<string>('');
const [fileLoading, setFileLoading] = useState(false);
const [previewOverrideUrl, setPreviewOverrideUrl] = useState<string>('');
const [focusPage, setFocusPage] = useState<number | null>(null);
const abortControllerRef = useRef<AbortController | null>(null);
const pdfContainerRef = useRef<HTMLDivElement | null>(null);
const [pdfRendered, setPdfRendered] = useState<boolean>(false);
// 使用chunk列表hook
const {
@@ -66,6 +56,19 @@ function ChunkParsedResult() {
keywords: searchKeyword
});
const [knowledgeBase, setKnowledgeBase] = useState<IKnowledge | null>(null);
const [document, setDocument] = useState<IKnowledgeFile | null>(null);
const [documentFile, setDocumentFile] = useState<Blob | null>(null);
const [fileUrl, setFileUrl] = useState<string>('');
const [fileLoading, setFileLoading] = useState(false);
const [previewOverrideUrl, setPreviewOverrideUrl] = useState<string>('');
const [focusPage, setFocusPage] = useState<number | null>(null);
const abortControllerRef = useRef<AbortController | null>(null);
const pdfContainerRef = useRef<HTMLDivElement | null>(null);
const [pdfRendered, setPdfRendered] = useState<boolean>(false);
const [selectedChunkId, setSelectedChunkId] = useState<string>('');
const selectedChunk = chunks.find((c) => c.chunk_id === selectedChunkId) || null;
// 获取知识库和文档信息
useEffect(() => {
const fetchData = async () => {
@@ -204,6 +207,10 @@ function ChunkParsedResult() {
const pageWrapper = window.document.createElement('div');
pageWrapper.setAttribute('data-page-index', String(pageNum));
pageWrapper.setAttribute('data-scale', String(scale));
pageWrapper.setAttribute('data-viewport-width', String(viewport.width));
pageWrapper.setAttribute('data-viewport-height', String(viewport.height));
pageWrapper.style.position = 'relative';
pageWrapper.appendChild(canvas);
container.appendChild(pageWrapper);
}
@@ -278,6 +285,7 @@ function ChunkParsedResult() {
if (documentFile?.type === 'application/pdf') {
setFocusPage(page && !Number.isNaN(page) ? page : null);
setPreviewOverrideUrl('');
setSelectedChunkId(chunk.chunk_id || '');
return;
}
@@ -286,6 +294,107 @@ function ChunkParsedResult() {
setFocusPage(null);
};
useEffect(() => {
if (documentFile?.type !== 'application/pdf' || !pdfRendered) return;
const container = pdfContainerRef.current;
if (!container) return;
Array.from(container.querySelectorAll('.pdf-highlight-layer')).forEach((el) => el.remove());
const chunk = selectedChunk;
const positions: any[] = (chunk?.positions || []) as any[];
if (!Array.isArray(positions) || positions.length === 0) return;
// 1) 将位置按页分组
const pageMap = new Map<number, Array<{x1:number;x2:number;y1:number;y2:number}>>();
positions.forEach((pos) => {
if (!Array.isArray(pos) || pos.length < 5) return;
const p = Number(pos[0]);
const x1 = Number(pos[1]);
const x2 = Number(pos[2]);
const y1 = Number(pos[3]);
const y2 = Number(pos[4]);
const list = pageMap.get(p) || [];
list.push({ x1, x2, y1, y2 });
pageMap.set(p, list);
});
// 2) 设置容差与行距阈值:用于合并同一段落的多行
const XTOL = 2; // x范围容差像素
const GAP_TOL = 8; // 行间距阈值,像素
const quant = (v: number) => Math.round(v / XTOL) * XTOL;
// 3) 遍历每页:按量化后的 x1/x2 分桶,再按 y1 合并相邻行
pageMap.forEach((segList, pageNumber) => {
const pageWrapper = container.querySelector(`[data-page-index="${pageNumber}"]`) as HTMLElement | null;
if (!pageWrapper) return;
const scale = Number(pageWrapper.getAttribute('data-scale') || '1');
const canvas = pageWrapper.querySelector('canvas') as HTMLCanvasElement | null;
if (!canvas) return;
const cssWidth = canvas.clientWidth;
const cssHeight = canvas.clientHeight;
// 分桶:相同(或近似)列宽的文本认为是一段
const buckets = new Map<string, Array<{x1:number;x2:number;y1:number;y2:number}>>();
segList.forEach(s => {
const key = `${quant(s.x1)}-${quant(s.x2)}`;
const arr = buckets.get(key) || [];
arr.push(s);
buckets.set(key, arr);
});
const layer = window.document.createElement('div');
layer.className = 'pdf-highlight-layer';
layer.style.position = 'absolute';
layer.style.left = '0px';
layer.style.top = '0px';
layer.style.width = `${cssWidth}px`;
layer.style.height = `${cssHeight}px`;
layer.style.pointerEvents = 'none';
buckets.forEach((bucketSegs) => {
const segs = bucketSegs.slice().sort((a,b) => a.y1 - b.y1);
const merged: Array<{x1:number;x2:number;y1:number;y2:number}> = [];
segs.forEach(seg => {
const last = merged[merged.length - 1];
if (!last) {
merged.push({ ...seg });
return;
}
const similarWidth = Math.abs(seg.x1 - last.x1) <= XTOL && Math.abs(seg.x2 - last.x2) <= XTOL;
const contiguous = seg.y1 <= last.y2 + GAP_TOL; // 上一行的下边缘到当前行的上边缘间隔很小
if (similarWidth && contiguous) {
last.y2 = Math.max(last.y2, seg.y2);
} else {
merged.push({ ...seg });
}
});
merged.forEach(m => {
const left = m.x1 * scale;
const width = (m.x2 - m.x1) * scale;
const top = m.y1 * scale;
const height = (m.y2 - m.y1) * scale;
const rect = window.document.createElement('div');
rect.style.position = 'absolute';
rect.style.left = `${left}px`;
rect.style.top = `${top}px`;
rect.style.width = `${Math.max(0, width)}px`;
rect.style.height = `${Math.max(0, height)}px`;
rect.style.background = 'rgba(255, 230, 0, 0.30)';
rect.style.border = '1px solid rgba(255, 193, 7, 0.75)';
rect.style.borderRadius = '2px';
rect.style.pointerEvents = 'none';
layer.appendChild(rect);
});
});
pageWrapper.appendChild(layer);
});
}, [selectedChunk, pdfRendered, documentFile]);
// 渲染左侧预览
const renderPreview = () => {
// 如果有覆盖的图片URL直接显示图片
@@ -442,6 +551,11 @@ function ChunkParsedResult() {
onRefresh={refresh}
docName={document?.name}
onLocate={handleLocate}
selectedChunkId={selectedChunkId}
onSelect={(chunk) => {
setSelectedChunkId(chunk.chunk_id || '');
handleLocate(chunk);
}}
/>
</Box>
</Paper>
@@ -451,4 +565,4 @@ function ChunkParsedResult() {
);
}
export default ChunkParsedResult;
export default ChunkParsedResult;

View File

@@ -7,6 +7,6 @@
],
// exclude rag_web_core/**/*
"exclude": [
"rag_web_core/**"
"ragflow_web/**"
]
}