feat(chunk): add chunk selection and highlighting in PDF viewer
This commit is contained in:
@@ -6,18 +6,28 @@ import tseslint from 'typescript-eslint'
|
|||||||
import { defineConfig, globalIgnores } from 'eslint/config'
|
import { defineConfig, globalIgnores } from 'eslint/config'
|
||||||
|
|
||||||
export default defineConfig([
|
export default defineConfig([
|
||||||
globalIgnores(['dist', 'rag_web_core']),
|
globalIgnores(['dist', 'ragflow_web']),
|
||||||
{
|
{
|
||||||
files: ['**/*.{ts,tsx}'],
|
files: ['**/*.{ts,tsx}'],
|
||||||
|
rules: {},
|
||||||
extends: [
|
extends: [
|
||||||
js.configs.recommended,
|
js.configs.recommended,
|
||||||
tseslint.configs.recommended,
|
// tseslint.configs.recommended,
|
||||||
reactHooks.configs['recommended-latest'],
|
reactHooks.configs['recommended-latest'],
|
||||||
reactRefresh.configs.vite,
|
reactRefresh.configs.vite,
|
||||||
],
|
],
|
||||||
languageOptions: {
|
languageOptions: {
|
||||||
ecmaVersion: 2020,
|
ecmaVersion: 2020,
|
||||||
globals: globals.browser,
|
globals: globals.browser,
|
||||||
|
parser: tseslint.parser,
|
||||||
|
parserOptions: {
|
||||||
|
ecmaFeatures: { jsx: true },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
plugins: {
|
||||||
|
'@typescript-eslint': tseslint.plugin,
|
||||||
|
'react-hooks': reactHooks,
|
||||||
|
'react-refresh': reactRefresh,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
])
|
])
|
||||||
|
|||||||
@@ -60,10 +60,12 @@ interface ChunkListResultProps {
|
|||||||
onRefresh?: () => void;
|
onRefresh?: () => void;
|
||||||
docName?: string;
|
docName?: string;
|
||||||
onLocate?: (chunk: IChunk) => void;
|
onLocate?: (chunk: IChunk) => void;
|
||||||
|
selectedChunkId?: string;
|
||||||
|
onSelect?: (chunk: IChunk) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
function ChunkListResult(props: ChunkListResultProps) {
|
function ChunkListResult(props: ChunkListResultProps) {
|
||||||
const { doc_id, chunks, total, loading, page, pageSize, onPageChange, onRefresh, onLocate } = props;
|
const { doc_id, chunks, total, loading, page, pageSize, onPageChange, onRefresh, onLocate, selectedChunkId, onSelect } = props;
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
|
|
||||||
// 选择状态
|
// 选择状态
|
||||||
@@ -363,14 +365,13 @@ function ChunkListResult(props: ChunkListResultProps) {
|
|||||||
'&:hover': {
|
'&:hover': {
|
||||||
boxShadow: 2,
|
boxShadow: 2,
|
||||||
},
|
},
|
||||||
border: selectedChunks.includes(chunk.chunk_id) ? '2px solid' : '1px solid',
|
border: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id) ? '2px solid' : '1px solid',
|
||||||
borderColor: selectedChunks.includes(chunk.chunk_id)
|
borderColor: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id)
|
||||||
? 'primary.main'
|
? 'primary.main'
|
||||||
: chunk.available_int === 1
|
: (chunk.available_int === 1 ? 'success.light' : 'grey.300'),
|
||||||
? 'success.light'
|
backgroundColor: (selectedChunks.includes(chunk.chunk_id) || selectedChunkId === chunk.chunk_id) ? 'action.selected' : 'background.paper',
|
||||||
: 'grey.300',
|
|
||||||
backgroundColor: selectedChunks.includes(chunk.chunk_id) ? 'action.selected' : 'background.paper',
|
|
||||||
}}
|
}}
|
||||||
|
onClick={() => onSelect?.(chunk)}
|
||||||
>
|
>
|
||||||
<CardContent sx={{ p: 2 }}>
|
<CardContent sx={{ p: 2 }}>
|
||||||
{/* 头部操作区域 */}
|
{/* 头部操作区域 */}
|
||||||
@@ -389,7 +390,7 @@ function ChunkListResult(props: ChunkListResultProps) {
|
|||||||
/>
|
/>
|
||||||
{/* 定位到文档位置 */}
|
{/* 定位到文档位置 */}
|
||||||
<Tooltip title={'定位'}>
|
<Tooltip title={'定位'}>
|
||||||
<IconButton size="small" onClick={() => onLocate?.(chunk)}>
|
<IconButton size="small" onClick={() => { onSelect?.(chunk); onLocate?.(chunk); }}>
|
||||||
<ZoomInIcon />
|
<ZoomInIcon />
|
||||||
</IconButton>
|
</IconButton>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
@@ -425,7 +426,7 @@ function ChunkListResult(props: ChunkListResultProps) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
onClick={() => handleImageClick(`${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`, chunk)}
|
onClick={() => { onSelect?.(chunk); handleImageClick(`${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`, chunk); }}
|
||||||
onMouseEnter={(e) => handleImageHover(e, `${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`)}
|
onMouseEnter={(e) => handleImageHover(e, `${import.meta.env.VITE_API_BASE_URL}/v1/document/image/${chunk.image_id}`)}
|
||||||
onMouseLeave={handleImageHoverClose}
|
onMouseLeave={handleImageHoverClose}
|
||||||
>
|
>
|
||||||
@@ -752,4 +753,4 @@ function ChunkListResult(props: ChunkListResultProps) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export default ChunkListResult;
|
export default ChunkListResult;
|
||||||
|
|||||||
@@ -37,17 +37,7 @@ function ChunkParsedResult() {
|
|||||||
const kb_id = searchParams.get('kb_id');
|
const kb_id = searchParams.get('kb_id');
|
||||||
const doc_id = searchParams.get('doc_id');
|
const doc_id = searchParams.get('doc_id');
|
||||||
|
|
||||||
const [knowledgeBase, setKnowledgeBase] = useState<IKnowledge | null>(null);
|
|
||||||
const [document, setDocument] = useState<IKnowledgeFile | null>(null);
|
|
||||||
const [searchKeyword, setSearchKeyword] = useState('');
|
const [searchKeyword, setSearchKeyword] = useState('');
|
||||||
const [documentFile, setDocumentFile] = useState<Blob | null>(null);
|
|
||||||
const [fileUrl, setFileUrl] = useState<string>('');
|
|
||||||
const [fileLoading, setFileLoading] = useState(false);
|
|
||||||
const [previewOverrideUrl, setPreviewOverrideUrl] = useState<string>('');
|
|
||||||
const [focusPage, setFocusPage] = useState<number | null>(null);
|
|
||||||
const abortControllerRef = useRef<AbortController | null>(null);
|
|
||||||
const pdfContainerRef = useRef<HTMLDivElement | null>(null);
|
|
||||||
const [pdfRendered, setPdfRendered] = useState<boolean>(false);
|
|
||||||
|
|
||||||
// 使用chunk列表hook
|
// 使用chunk列表hook
|
||||||
const {
|
const {
|
||||||
@@ -66,6 +56,19 @@ function ChunkParsedResult() {
|
|||||||
keywords: searchKeyword
|
keywords: searchKeyword
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const [knowledgeBase, setKnowledgeBase] = useState<IKnowledge | null>(null);
|
||||||
|
const [document, setDocument] = useState<IKnowledgeFile | null>(null);
|
||||||
|
const [documentFile, setDocumentFile] = useState<Blob | null>(null);
|
||||||
|
const [fileUrl, setFileUrl] = useState<string>('');
|
||||||
|
const [fileLoading, setFileLoading] = useState(false);
|
||||||
|
const [previewOverrideUrl, setPreviewOverrideUrl] = useState<string>('');
|
||||||
|
const [focusPage, setFocusPage] = useState<number | null>(null);
|
||||||
|
const abortControllerRef = useRef<AbortController | null>(null);
|
||||||
|
const pdfContainerRef = useRef<HTMLDivElement | null>(null);
|
||||||
|
const [pdfRendered, setPdfRendered] = useState<boolean>(false);
|
||||||
|
const [selectedChunkId, setSelectedChunkId] = useState<string>('');
|
||||||
|
const selectedChunk = chunks.find((c) => c.chunk_id === selectedChunkId) || null;
|
||||||
|
|
||||||
// 获取知识库和文档信息
|
// 获取知识库和文档信息
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const fetchData = async () => {
|
const fetchData = async () => {
|
||||||
@@ -204,6 +207,10 @@ function ChunkParsedResult() {
|
|||||||
|
|
||||||
const pageWrapper = window.document.createElement('div');
|
const pageWrapper = window.document.createElement('div');
|
||||||
pageWrapper.setAttribute('data-page-index', String(pageNum));
|
pageWrapper.setAttribute('data-page-index', String(pageNum));
|
||||||
|
pageWrapper.setAttribute('data-scale', String(scale));
|
||||||
|
pageWrapper.setAttribute('data-viewport-width', String(viewport.width));
|
||||||
|
pageWrapper.setAttribute('data-viewport-height', String(viewport.height));
|
||||||
|
pageWrapper.style.position = 'relative';
|
||||||
pageWrapper.appendChild(canvas);
|
pageWrapper.appendChild(canvas);
|
||||||
container.appendChild(pageWrapper);
|
container.appendChild(pageWrapper);
|
||||||
}
|
}
|
||||||
@@ -278,6 +285,7 @@ function ChunkParsedResult() {
|
|||||||
if (documentFile?.type === 'application/pdf') {
|
if (documentFile?.type === 'application/pdf') {
|
||||||
setFocusPage(page && !Number.isNaN(page) ? page : null);
|
setFocusPage(page && !Number.isNaN(page) ? page : null);
|
||||||
setPreviewOverrideUrl('');
|
setPreviewOverrideUrl('');
|
||||||
|
setSelectedChunkId(chunk.chunk_id || '');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -286,6 +294,107 @@ function ChunkParsedResult() {
|
|||||||
setFocusPage(null);
|
setFocusPage(null);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (documentFile?.type !== 'application/pdf' || !pdfRendered) return;
|
||||||
|
const container = pdfContainerRef.current;
|
||||||
|
if (!container) return;
|
||||||
|
|
||||||
|
Array.from(container.querySelectorAll('.pdf-highlight-layer')).forEach((el) => el.remove());
|
||||||
|
|
||||||
|
const chunk = selectedChunk;
|
||||||
|
const positions: any[] = (chunk?.positions || []) as any[];
|
||||||
|
if (!Array.isArray(positions) || positions.length === 0) return;
|
||||||
|
|
||||||
|
// 1) 将位置按页分组
|
||||||
|
const pageMap = new Map<number, Array<{x1:number;x2:number;y1:number;y2:number}>>();
|
||||||
|
positions.forEach((pos) => {
|
||||||
|
if (!Array.isArray(pos) || pos.length < 5) return;
|
||||||
|
const p = Number(pos[0]);
|
||||||
|
const x1 = Number(pos[1]);
|
||||||
|
const x2 = Number(pos[2]);
|
||||||
|
const y1 = Number(pos[3]);
|
||||||
|
const y2 = Number(pos[4]);
|
||||||
|
const list = pageMap.get(p) || [];
|
||||||
|
list.push({ x1, x2, y1, y2 });
|
||||||
|
pageMap.set(p, list);
|
||||||
|
});
|
||||||
|
|
||||||
|
// 2) 设置容差与行距阈值:用于合并同一段落的多行
|
||||||
|
const XTOL = 2; // x范围容差,像素
|
||||||
|
const GAP_TOL = 8; // 行间距阈值,像素
|
||||||
|
const quant = (v: number) => Math.round(v / XTOL) * XTOL;
|
||||||
|
|
||||||
|
// 3) 遍历每页:按量化后的 x1/x2 分桶,再按 y1 合并相邻行
|
||||||
|
pageMap.forEach((segList, pageNumber) => {
|
||||||
|
const pageWrapper = container.querySelector(`[data-page-index="${pageNumber}"]`) as HTMLElement | null;
|
||||||
|
if (!pageWrapper) return;
|
||||||
|
const scale = Number(pageWrapper.getAttribute('data-scale') || '1');
|
||||||
|
const canvas = pageWrapper.querySelector('canvas') as HTMLCanvasElement | null;
|
||||||
|
if (!canvas) return;
|
||||||
|
|
||||||
|
const cssWidth = canvas.clientWidth;
|
||||||
|
const cssHeight = canvas.clientHeight;
|
||||||
|
|
||||||
|
// 分桶:相同(或近似)列宽的文本认为是一段
|
||||||
|
const buckets = new Map<string, Array<{x1:number;x2:number;y1:number;y2:number}>>();
|
||||||
|
segList.forEach(s => {
|
||||||
|
const key = `${quant(s.x1)}-${quant(s.x2)}`;
|
||||||
|
const arr = buckets.get(key) || [];
|
||||||
|
arr.push(s);
|
||||||
|
buckets.set(key, arr);
|
||||||
|
});
|
||||||
|
|
||||||
|
const layer = window.document.createElement('div');
|
||||||
|
layer.className = 'pdf-highlight-layer';
|
||||||
|
layer.style.position = 'absolute';
|
||||||
|
layer.style.left = '0px';
|
||||||
|
layer.style.top = '0px';
|
||||||
|
layer.style.width = `${cssWidth}px`;
|
||||||
|
layer.style.height = `${cssHeight}px`;
|
||||||
|
layer.style.pointerEvents = 'none';
|
||||||
|
|
||||||
|
buckets.forEach((bucketSegs) => {
|
||||||
|
const segs = bucketSegs.slice().sort((a,b) => a.y1 - b.y1);
|
||||||
|
const merged: Array<{x1:number;x2:number;y1:number;y2:number}> = [];
|
||||||
|
segs.forEach(seg => {
|
||||||
|
const last = merged[merged.length - 1];
|
||||||
|
if (!last) {
|
||||||
|
merged.push({ ...seg });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const similarWidth = Math.abs(seg.x1 - last.x1) <= XTOL && Math.abs(seg.x2 - last.x2) <= XTOL;
|
||||||
|
const contiguous = seg.y1 <= last.y2 + GAP_TOL; // 上一行的下边缘到当前行的上边缘间隔很小
|
||||||
|
if (similarWidth && contiguous) {
|
||||||
|
last.y2 = Math.max(last.y2, seg.y2);
|
||||||
|
} else {
|
||||||
|
merged.push({ ...seg });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
merged.forEach(m => {
|
||||||
|
const left = m.x1 * scale;
|
||||||
|
const width = (m.x2 - m.x1) * scale;
|
||||||
|
const top = m.y1 * scale;
|
||||||
|
const height = (m.y2 - m.y1) * scale;
|
||||||
|
|
||||||
|
const rect = window.document.createElement('div');
|
||||||
|
rect.style.position = 'absolute';
|
||||||
|
rect.style.left = `${left}px`;
|
||||||
|
rect.style.top = `${top}px`;
|
||||||
|
rect.style.width = `${Math.max(0, width)}px`;
|
||||||
|
rect.style.height = `${Math.max(0, height)}px`;
|
||||||
|
rect.style.background = 'rgba(255, 230, 0, 0.30)';
|
||||||
|
rect.style.border = '1px solid rgba(255, 193, 7, 0.75)';
|
||||||
|
rect.style.borderRadius = '2px';
|
||||||
|
rect.style.pointerEvents = 'none';
|
||||||
|
layer.appendChild(rect);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
pageWrapper.appendChild(layer);
|
||||||
|
});
|
||||||
|
}, [selectedChunk, pdfRendered, documentFile]);
|
||||||
|
|
||||||
// 渲染左侧预览
|
// 渲染左侧预览
|
||||||
const renderPreview = () => {
|
const renderPreview = () => {
|
||||||
// 如果有覆盖的图片URL,直接显示图片
|
// 如果有覆盖的图片URL,直接显示图片
|
||||||
@@ -442,6 +551,11 @@ function ChunkParsedResult() {
|
|||||||
onRefresh={refresh}
|
onRefresh={refresh}
|
||||||
docName={document?.name}
|
docName={document?.name}
|
||||||
onLocate={handleLocate}
|
onLocate={handleLocate}
|
||||||
|
selectedChunkId={selectedChunkId}
|
||||||
|
onSelect={(chunk) => {
|
||||||
|
setSelectedChunkId(chunk.chunk_id || '');
|
||||||
|
handleLocate(chunk);
|
||||||
|
}}
|
||||||
/>
|
/>
|
||||||
</Box>
|
</Box>
|
||||||
</Paper>
|
</Paper>
|
||||||
@@ -451,4 +565,4 @@ function ChunkParsedResult() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export default ChunkParsedResult;
|
export default ChunkParsedResult;
|
||||||
|
|||||||
@@ -7,6 +7,6 @@
|
|||||||
],
|
],
|
||||||
// exclude rag_web_core/**/*
|
// exclude rag_web_core/**/*
|
||||||
"exclude": [
|
"exclude": [
|
||||||
"rag_web_core/**"
|
"ragflow_web/**"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user