feat(knowledge): add chunk management and document processing features

This commit is contained in:
2025-10-16 16:23:53 +08:00
parent 4f956e79ba
commit 5a0a9ef2a1
17 changed files with 1655 additions and 366 deletions

View File

@@ -0,0 +1,247 @@
import React from 'react';
import {
Box,
Paper,
Typography,
Grid,
Card,
CardContent,
Chip,
Stack,
Pagination,
CircularProgress,
Alert,
} from '@mui/material';
import type { IChunk } from '@/interfaces/database/knowledge';
interface ChunkListResultProps {
chunks: IChunk[];
total: number;
loading: boolean;
error: string | null;
page: number;
pageSize: number;
onPageChange: (page: number) => void;
docName?: string;
}
function ChunkListResult(props: ChunkListResultProps) {
const { chunks, total, loading, error, page, pageSize, onPageChange, docName } = props;
if (loading) {
return (
<Paper sx={{ p: 3, textAlign: 'center' }}>
<CircularProgress />
<Typography variant="body2" color="text.secondary" sx={{ mt: 2 }}>
chunk数据...
</Typography>
</Paper>
);
}
if (error) {
return (
<Paper sx={{ p: 3 }}>
<Alert severity="error">
{error}
</Alert>
</Paper>
);
}
if (!chunks || chunks.length === 0) {
return (
<Paper sx={{ p: 3, textAlign: 'center' }}>
<Typography variant="h6" color="text.secondary">
chunk数据
</Typography>
<Typography variant="body2" color="text.secondary" sx={{ mt: 1 }}>
chunk数据
</Typography>
</Paper>
);
}
const totalPages = Math.ceil(total / pageSize);
return (
<Box>
{/* Chunk结果概览 */}
<Paper sx={{ p: 3, mb: 3 }}>
<Typography variant="h6" gutterBottom>
Chunk详情
</Typography>
{docName && (
<Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
: {docName}
</Typography>
)}
<Grid container spacing={2}>
<Grid size={{ xs: 12, sm: 6 }}>
<Card>
<CardContent>
<Typography variant="h4" color="primary">
{total}
</Typography>
<Typography variant="body2" color="text.secondary">
Chunk数量
</Typography>
</CardContent>
</Card>
</Grid>
<Grid size={{ xs: 12, sm: 6 }}>
<Card>
<CardContent>
<Typography variant="h4" color="secondary">
{chunks.filter(chunk => chunk.available_int === 1).length}
</Typography>
<Typography variant="body2" color="text.secondary">
Chunk
</Typography>
</CardContent>
</Card>
</Grid>
</Grid>
</Paper>
{/* Chunk列表 */}
<Paper sx={{ p: 3, mb: 3 }}>
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', mb: 2 }}>
<Typography variant="h6">
Chunk列表 ( {page} {totalPages} )
</Typography>
<Typography variant="body2" color="text.secondary">
{total} chunk
</Typography>
</Box>
<Grid container spacing={2}>
{chunks.map((chunk, index) => (
<Grid size={12} key={chunk.chunk_id}>
<Card variant="outlined">
<CardContent>
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start', mb: 2 }}>
<Typography variant="subtitle1" fontWeight="bold">
Chunk #{((page - 1) * pageSize) + index + 1}
</Typography>
<Stack direction="row" spacing={1}>
<Chip
label={chunk.available_int === 1 ? '已启用' : '未启用'}
size="small"
color={chunk.available_int === 1 ? 'success' : 'default'}
/>
{chunk.image_id && (
<Chip
label="包含图片"
size="small"
color="info"
variant="outlined"
/>
)}
</Stack>
</Box>
<Typography
variant="body2"
sx={{
mb: 2,
maxHeight: '200px',
overflow: 'auto',
whiteSpace: 'pre-wrap',
backgroundColor: 'grey.50',
p: 2,
borderRadius: 1,
}}
>
{chunk.content_with_weight || '无内容'}
</Typography>
{chunk.important_kwd && chunk.important_kwd.length > 0 && (
<Box sx={{ mt: 2 }}>
<Typography variant="caption" color="text.secondary" sx={{ mb: 1, display: 'block' }}>
:
</Typography>
<Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 0.5 }}>
{chunk.important_kwd.map((keyword, kwdIndex) => (
<Chip
key={kwdIndex}
label={keyword}
size="small"
variant="outlined"
color="primary"
/>
))}
</Box>
</Box>
)}
{chunk.question_kwd && chunk.question_kwd.length > 0 && (
<Box sx={{ mt: 2 }}>
<Typography variant="caption" color="text.secondary" sx={{ mb: 1, display: 'block' }}>
:
</Typography>
<Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 0.5 }}>
{chunk.question_kwd.map((keyword, kwdIndex) => (
<Chip
key={kwdIndex}
label={keyword}
size="small"
variant="outlined"
color="secondary"
/>
))}
</Box>
</Box>
)}
{chunk.tag_kwd && chunk.tag_kwd.length > 0 && (
<Box sx={{ mt: 2 }}>
<Typography variant="caption" color="text.secondary" sx={{ mb: 1, display: 'block' }}>
:
</Typography>
<Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 0.5 }}>
{chunk.tag_kwd.map((keyword, kwdIndex) => (
<Chip
key={kwdIndex}
label={keyword}
size="small"
variant="outlined"
color="info"
/>
))}
</Box>
</Box>
)}
{chunk.positions && chunk.positions.length > 0 && (
<Box sx={{ mt: 2 }}>
<Typography variant="caption" color="text.secondary">
: {chunk.positions.length}
</Typography>
</Box>
)}
</CardContent>
</Card>
</Grid>
))}
</Grid>
{/* 分页控件 */}
{totalPages > 1 && (
<Box sx={{ display: 'flex', justifyContent: 'center', mt: 3 }}>
<Pagination
count={totalPages}
page={page}
onChange={(_, newPage) => onPageChange(newPage)}
color="primary"
showFirstButton
showLastButton
/>
</Box>
)}
</Paper>
</Box>
);
}
export default ChunkListResult;

View File

@@ -0,0 +1,269 @@
import React, { useState, useEffect } from 'react';
import { useSearchParams, useNavigate } from "react-router-dom";
import {
Box,
Typography,
Breadcrumbs,
Link,
TextField,
InputAdornment,
Paper,
Alert,
Card,
CardContent,
CardMedia
} from "@mui/material";
import { Search as SearchIcon, ArrowBack as ArrowBackIcon } from '@mui/icons-material';
import { useChunkList } from '@/hooks/chunk-hooks';
import ChunkListResult from './components/ChunkListResult';
import knowledgeService from '@/services/knowledge_service';
import type { IKnowledge, IKnowledgeFile } from '@/interfaces/database/knowledge';
import type { IDocumentInfo } from '@/interfaces/database/document';
function ChunkParsedResult() {
const [searchParams] = useSearchParams();
const navigate = useNavigate();
const kb_id = searchParams.get('kb_id');
const doc_id = searchParams.get('doc_id');
const [knowledgeBase, setKnowledgeBase] = useState<IKnowledge | null>(null);
const [document, setDocument] = useState<IKnowledgeFile | null>(null);
const [documentFile, setDocumentFile] = useState<Blob | null>(null);
const [fileUrl, setFileUrl] = useState<string>('');
const [searchKeyword, setSearchKeyword] = useState('');
// 使用chunk列表hook
const {
chunks,
total,
loading,
error,
currentPage,
pageSize,
setCurrentPage,
setKeywords,
refresh
} = useChunkList(doc_id || '', {
page: 1,
size: 10,
keywords: searchKeyword
});
// 获取知识库和文档信息
useEffect(() => {
const fetchData = async () => {
if (!kb_id || !doc_id) return;
try {
// 获取知识库信息
const kbResponse = await knowledgeService.getKnowledgeDetail({ kb_id });
if (kbResponse.data.code === 0) {
setKnowledgeBase(kbResponse.data.data);
}
// 获取文档信息
const docResponse = await knowledgeService.getDocumentInfos({ doc_ids: [doc_id] });
if (docResponse.data.code === 0) {
const docArr: IKnowledgeFile[] = docResponse.data.data;
if (docArr.length > 0) {
setDocument(docArr[0]);
}
}
// 获取文档文件
const fileResponse = await knowledgeService.getDocumentFile({ doc_id });
if (fileResponse.data) {
// 处理二进制文件数据
setDocumentFile(fileResponse.data);
// 创建文件URL用于预览
const url = URL.createObjectURL(fileResponse.data);
setFileUrl(url);
}
} catch (error) {
console.error('Failed to fetch data:', error);
}
};
// 处理搜索
const handleSearch = (keyword: string) => {
setSearchKeyword(keyword);
setKeywords(keyword);
setCurrentPage(1);
};
fetchData();
// 清理函数释放URL对象
return () => {
if (fileUrl) {
URL.revokeObjectURL(fileUrl);
}
};
}, [kb_id, doc_id]);
// 渲染文件预览组件
const renderFilePreview = () => {
if (!document || !fileUrl) return null;
const fileExtension = document.name?.split('.').pop()?.toLowerCase();
// 图片文件预览
if (['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp', 'svg'].includes(fileExtension || '')) {
return (
<Card sx={{ mb: 3 }}>
<CardContent>
<Typography variant="h6" gutterBottom>
</Typography>
<CardMedia
component="img"
sx={{
maxHeight: 400,
objectFit: 'contain',
border: '1px solid #e0e0e0',
borderRadius: 1
}}
image={fileUrl}
alt={document.name}
/>
</CardContent>
</Card>
);
}
// PDF文件预览
if (fileExtension === 'pdf') {
return (
<Card sx={{ mb: 3 }}>
<CardContent>
<Typography variant="h6" gutterBottom>
PDF预览
</Typography>
<Box sx={{ height: 600, border: '1px solid #e0e0e0', borderRadius: 1 }}>
<iframe
src={fileUrl}
width="100%"
height="100%"
style={{ border: 'none' }}
title={document.name}
/>
</Box>
</CardContent>
</Card>
);
}
// 其他文件类型显示下载链接
return (
<Card sx={{ mb: 3 }}>
<CardContent>
<Typography variant="h6" gutterBottom>
</Typography>
<Typography variant="body2" color="text.secondary" gutterBottom>
: {document.name}
</Typography>
<Typography variant="body2" color="text.secondary" gutterBottom>
: {fileExtension?.toUpperCase() || '未知'}
</Typography>
<Link
href={fileUrl}
download={document.name}
sx={{ mt: 2, display: 'inline-block' }}
>
</Link>
</CardContent>
</Card>
);
};
if (!kb_id || !doc_id) {
return (
<Box sx={{ p: 3 }}>
<Alert severity="error">
ID或文档ID
</Alert>
</Box>
);
}
return (
<Box sx={{ p: 3 }}>
{/* 面包屑导航 */}
<Box sx={{ mb: 3 }}>
<Breadcrumbs>
<Link
color="inherit"
href="#"
onClick={(e) => {
e.preventDefault();
navigate('/knowledge');
}}
sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}
>
</Link>
<Link
color="inherit"
href="#"
onClick={(e) => {
e.preventDefault();
navigate(`/knowledge/${kb_id}`);
}}
>
{knowledgeBase?.name || '知识库详情'}
</Link>
<Typography color="text.primary">
{document?.name || '文档Chunk详情'}
</Typography>
</Breadcrumbs>
</Box>
{/* 页面标题 */}
<Paper sx={{ p: 3, mb: 3 }}>
<Typography variant="h4" gutterBottom>
Chunk解析结果
</Typography>
<Typography variant="body1" color="text.secondary">
"{document?.name}" chunk数据
</Typography>
</Paper>
{/* 文件预览 */}
{renderFilePreview()}
{/* 搜索框 */}
<Paper sx={{ p: 3, mb: 3 }}>
<TextField
fullWidth
placeholder="搜索chunk内容..."
value={searchKeyword}
// onChange={(e) => handleSearch(e.target.value)}
InputProps={{
startAdornment: (
<InputAdornment position="start">
<SearchIcon />
</InputAdornment>
),
}}
/>
</Paper>
{/* Chunk列表结果 */}
<ChunkListResult
chunks={chunks}
total={total}
loading={loading}
error={error}
page={currentPage}
pageSize={pageSize}
onPageChange={setCurrentPage}
docName={document?.name}
/>
</Box>
);
}
export default ChunkParsedResult;