feat(chunk): add chunk selection and highlighting in PDF viewer
This commit is contained in:
@@ -6,18 +6,28 @@ import tseslint from 'typescript-eslint'
|
||||
import { defineConfig, globalIgnores } from 'eslint/config'
|
||||
|
||||
export default defineConfig([
|
||||
globalIgnores(['dist', 'rag_web_core']),
|
||||
globalIgnores(['dist', 'ragflow_web']),
|
||||
{
|
||||
files: ['**/*.{ts,tsx}'],
|
||||
rules: {},
|
||||
extends: [
|
||||
js.configs.recommended,
|
||||
tseslint.configs.recommended,
|
||||
// tseslint.configs.recommended,
|
||||
reactHooks.configs['recommended-latest'],
|
||||
reactRefresh.configs.vite,
|
||||
],
|
||||
languageOptions: {
|
||||
ecmaVersion: 2020,
|
||||
globals: globals.browser,
|
||||
parser: tseslint.parser,
|
||||
parserOptions: {
|
||||
ecmaFeatures: { jsx: true },
|
||||
},
|
||||
},
|
||||
plugins: {
|
||||
'@typescript-eslint': tseslint.plugin,
|
||||
'react-hooks': reactHooks,
|
||||
'react-refresh': reactRefresh,
|
||||
},
|
||||
},
|
||||
])
|
||||
|
||||
@@ -60,10 +60,12 @@ interface ChunkListResultProps {
|
||||
onRefresh?: () => void;
|
||||
docName?: string;
|
||||
onLocate?: (chunk: IChunk) => void;
|
||||
selectedChunkId?: string;
|
||||
onSelect?: (chunk: IChunk) => void;
|
||||
}
|
||||
|
||||
function ChunkListResult(props: ChunkListResultProps) {
|
||||
const { doc_id, chunks, total, loading, page, pageSize, onPageChange, onRefresh, onLocate } = props;
|
||||
const { doc_id, chunks, total, loading, page, pageSize, onPageChange, onRefresh, onLocate, selectedChunkId, onSelect } = props;
|
||||
const { t } = useTranslation();
|
||||
|
||||
// 选择状态
|
||||
@@ -363,14 +365,13 @@ function ChunkListResult(props: ChunkListResultProps) {
|
||||
'&:hover': {
|
||||
boxShadow: 2,
|
||||
},
|
||||
border: selectedChunks.includes(chunk.chunk_id) ? '2px solid' : '1px solid',
|
||||
borderColor: selectedChunks.includes(chunk.chunk_id)
|
||||
border: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id) ? '2px solid' : '1px solid',
|
||||
borderColor: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id)
|
||||
? 'primary.main'
|
||||
: chunk.available_int === 1
|
||||
? 'success.light'
|
||||
: 'grey.300',
|
||||
backgroundColor: selectedChunks.includes(chunk.chunk_id) ? 'action.selected' : 'background.paper',
|
||||
: (chunk.available_int === 1 ? 'success.light' : 'grey.300'),
|
||||
backgroundColor: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id) ? 'action.selected' : 'background.paper',
|
||||
}}
|
||||
onClick={() => onSelect?.(chunk)}
|
||||
>
|
||||
<CardContent sx={{ p: 2 }}>
|
||||
{/* 头部操作区域 */}
|
||||
@@ -389,7 +390,7 @@ function ChunkListResult(props: ChunkListResultProps) {
|
||||
/>
|
||||
{/* 定位到文档位置 */}
|
||||
<Tooltip title={'定位'}>
|
||||
<IconButton size="small" onClick={() => onLocate?.(chunk)}>
|
||||
<IconButton size="small" onClick={() => { onSelect?.(chunk); onLocate?.(chunk); }}>
|
||||
<ZoomInIcon />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
@@ -425,7 +426,7 @@ function ChunkListResult(props: ChunkListResultProps) {
|
||||
}
|
||||
}
|
||||
}}
|
||||
onClick={() => handleImageClick(`${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`, chunk)}
|
||||
onClick={() => { onSelect?.(chunk); handleImageClick(`${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`, chunk); }}
|
||||
onMouseEnter={(e) => handleImageHover(e, `${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`)}
|
||||
onMouseLeave={handleImageHoverClose}
|
||||
>
|
||||
|
||||
@@ -37,17 +37,7 @@ function ChunkParsedResult() {
|
||||
const kb_id = searchParams.get('kb_id');
|
||||
const doc_id = searchParams.get('doc_id');
|
||||
|
||||
const [knowledgeBase, setKnowledgeBase] = useState<IKnowledge | null>(null);
|
||||
const [document, setDocument] = useState<IKnowledgeFile | null>(null);
|
||||
const [searchKeyword, setSearchKeyword] = useState('');
|
||||
const [documentFile, setDocumentFile] = useState<Blob | null>(null);
|
||||
const [fileUrl, setFileUrl] = useState<string>('');
|
||||
const [fileLoading, setFileLoading] = useState(false);
|
||||
const [previewOverrideUrl, setPreviewOverrideUrl] = useState<string>('');
|
||||
const [focusPage, setFocusPage] = useState<number | null>(null);
|
||||
const abortControllerRef = useRef<AbortController | null>(null);
|
||||
const pdfContainerRef = useRef<HTMLDivElement | null>(null);
|
||||
const [pdfRendered, setPdfRendered] = useState<boolean>(false);
|
||||
|
||||
// 使用chunk列表hook
|
||||
const {
|
||||
@@ -66,6 +56,19 @@ function ChunkParsedResult() {
|
||||
keywords: searchKeyword
|
||||
});
|
||||
|
||||
const [knowledgeBase, setKnowledgeBase] = useState<IKnowledge | null>(null);
|
||||
const [document, setDocument] = useState<IKnowledgeFile | null>(null);
|
||||
const [documentFile, setDocumentFile] = useState<Blob | null>(null);
|
||||
const [fileUrl, setFileUrl] = useState<string>('');
|
||||
const [fileLoading, setFileLoading] = useState(false);
|
||||
const [previewOverrideUrl, setPreviewOverrideUrl] = useState<string>('');
|
||||
const [focusPage, setFocusPage] = useState<number | null>(null);
|
||||
const abortControllerRef = useRef<AbortController | null>(null);
|
||||
const pdfContainerRef = useRef<HTMLDivElement | null>(null);
|
||||
const [pdfRendered, setPdfRendered] = useState<boolean>(false);
|
||||
const [selectedChunkId, setSelectedChunkId] = useState<string>('');
|
||||
const selectedChunk = chunks.find((c) => c.chunk_id === selectedChunkId) || null;
|
||||
|
||||
// 获取知识库和文档信息
|
||||
useEffect(() => {
|
||||
const fetchData = async () => {
|
||||
@@ -204,6 +207,10 @@ function ChunkParsedResult() {
|
||||
|
||||
const pageWrapper = window.document.createElement('div');
|
||||
pageWrapper.setAttribute('data-page-index', String(pageNum));
|
||||
pageWrapper.setAttribute('data-scale', String(scale));
|
||||
pageWrapper.setAttribute('data-viewport-width', String(viewport.width));
|
||||
pageWrapper.setAttribute('data-viewport-height', String(viewport.height));
|
||||
pageWrapper.style.position = 'relative';
|
||||
pageWrapper.appendChild(canvas);
|
||||
container.appendChild(pageWrapper);
|
||||
}
|
||||
@@ -278,6 +285,7 @@ function ChunkParsedResult() {
|
||||
if (documentFile?.type === 'application/pdf') {
|
||||
setFocusPage(page && !Number.isNaN(page) ? page : null);
|
||||
setPreviewOverrideUrl('');
|
||||
setSelectedChunkId(chunk.chunk_id || '');
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -286,6 +294,107 @@ function ChunkParsedResult() {
|
||||
setFocusPage(null);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (documentFile?.type !== 'application/pdf' || !pdfRendered) return;
|
||||
const container = pdfContainerRef.current;
|
||||
if (!container) return;
|
||||
|
||||
Array.from(container.querySelectorAll('.pdf-highlight-layer')).forEach((el) => el.remove());
|
||||
|
||||
const chunk = selectedChunk;
|
||||
const positions: any[] = (chunk?.positions || []) as any[];
|
||||
if (!Array.isArray(positions) || positions.length === 0) return;
|
||||
|
||||
// 1) 将位置按页分组
|
||||
const pageMap = new Map<number, Array<{x1:number;x2:number;y1:number;y2:number}>>();
|
||||
positions.forEach((pos) => {
|
||||
if (!Array.isArray(pos) || pos.length < 5) return;
|
||||
const p = Number(pos[0]);
|
||||
const x1 = Number(pos[1]);
|
||||
const x2 = Number(pos[2]);
|
||||
const y1 = Number(pos[3]);
|
||||
const y2 = Number(pos[4]);
|
||||
const list = pageMap.get(p) || [];
|
||||
list.push({ x1, x2, y1, y2 });
|
||||
pageMap.set(p, list);
|
||||
});
|
||||
|
||||
// 2) 设置容差与行距阈值:用于合并同一段落的多行
|
||||
const XTOL = 2; // x范围容差,像素
|
||||
const GAP_TOL = 8; // 行间距阈值,像素
|
||||
const quant = (v: number) => Math.round(v / XTOL) * XTOL;
|
||||
|
||||
// 3) 遍历每页:按量化后的 x1/x2 分桶,再按 y1 合并相邻行
|
||||
pageMap.forEach((segList, pageNumber) => {
|
||||
const pageWrapper = container.querySelector(`[data-page-index="${pageNumber}"]`) as HTMLElement | null;
|
||||
if (!pageWrapper) return;
|
||||
const scale = Number(pageWrapper.getAttribute('data-scale') || '1');
|
||||
const canvas = pageWrapper.querySelector('canvas') as HTMLCanvasElement | null;
|
||||
if (!canvas) return;
|
||||
|
||||
const cssWidth = canvas.clientWidth;
|
||||
const cssHeight = canvas.clientHeight;
|
||||
|
||||
// 分桶:相同(或近似)列宽的文本认为是一段
|
||||
const buckets = new Map<string, Array<{x1:number;x2:number;y1:number;y2:number}>>();
|
||||
segList.forEach(s => {
|
||||
const key = `${quant(s.x1)}-${quant(s.x2)}`;
|
||||
const arr = buckets.get(key) || [];
|
||||
arr.push(s);
|
||||
buckets.set(key, arr);
|
||||
});
|
||||
|
||||
const layer = window.document.createElement('div');
|
||||
layer.className = 'pdf-highlight-layer';
|
||||
layer.style.position = 'absolute';
|
||||
layer.style.left = '0px';
|
||||
layer.style.top = '0px';
|
||||
layer.style.width = `${cssWidth}px`;
|
||||
layer.style.height = `${cssHeight}px`;
|
||||
layer.style.pointerEvents = 'none';
|
||||
|
||||
buckets.forEach((bucketSegs) => {
|
||||
const segs = bucketSegs.slice().sort((a,b) => a.y1 - b.y1);
|
||||
const merged: Array<{x1:number;x2:number;y1:number;y2:number}> = [];
|
||||
segs.forEach(seg => {
|
||||
const last = merged[merged.length - 1];
|
||||
if (!last) {
|
||||
merged.push({ ...seg });
|
||||
return;
|
||||
}
|
||||
const similarWidth = Math.abs(seg.x1 - last.x1) <= XTOL && Math.abs(seg.x2 - last.x2) <= XTOL;
|
||||
const contiguous = seg.y1 <= last.y2 + GAP_TOL; // 上一行的下边缘到当前行的上边缘间隔很小
|
||||
if (similarWidth && contiguous) {
|
||||
last.y2 = Math.max(last.y2, seg.y2);
|
||||
} else {
|
||||
merged.push({ ...seg });
|
||||
}
|
||||
});
|
||||
|
||||
merged.forEach(m => {
|
||||
const left = m.x1 * scale;
|
||||
const width = (m.x2 - m.x1) * scale;
|
||||
const top = m.y1 * scale;
|
||||
const height = (m.y2 - m.y1) * scale;
|
||||
|
||||
const rect = window.document.createElement('div');
|
||||
rect.style.position = 'absolute';
|
||||
rect.style.left = `${left}px`;
|
||||
rect.style.top = `${top}px`;
|
||||
rect.style.width = `${Math.max(0, width)}px`;
|
||||
rect.style.height = `${Math.max(0, height)}px`;
|
||||
rect.style.background = 'rgba(255, 230, 0, 0.30)';
|
||||
rect.style.border = '1px solid rgba(255, 193, 7, 0.75)';
|
||||
rect.style.borderRadius = '2px';
|
||||
rect.style.pointerEvents = 'none';
|
||||
layer.appendChild(rect);
|
||||
});
|
||||
});
|
||||
|
||||
pageWrapper.appendChild(layer);
|
||||
});
|
||||
}, [selectedChunk, pdfRendered, documentFile]);
|
||||
|
||||
// 渲染左侧预览
|
||||
const renderPreview = () => {
|
||||
// 如果有覆盖的图片URL,直接显示图片
|
||||
@@ -442,6 +551,11 @@ function ChunkParsedResult() {
|
||||
onRefresh={refresh}
|
||||
docName={document?.name}
|
||||
onLocate={handleLocate}
|
||||
selectedChunkId={selectedChunkId}
|
||||
onSelect={(chunk) => {
|
||||
setSelectedChunkId(chunk.chunk_id || '');
|
||||
handleLocate(chunk);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Paper>
|
||||
|
||||
@@ -7,6 +7,6 @@
|
||||
],
|
||||
// exclude rag_web_core/**/*
|
||||
"exclude": [
|
||||
"rag_web_core/**"
|
||||
"ragflow_web/**"
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user