diff --git a/src/hooks/knowledge-hooks.ts b/src/hooks/knowledge-hooks.ts index 92f795e..2cd9820 100644 --- a/src/hooks/knowledge-hooks.ts +++ b/src/hooks/knowledge-hooks.ts @@ -3,6 +3,54 @@ import knowledgeService from '@/services/knowledge_service'; import type { IKnowledge, IKnowledgeResult } from '@/interfaces/database/knowledge'; import type { IFetchKnowledgeListRequestParams } from '@/interfaces/request/knowledge'; +/** +{ + "avatar": "data:image/png;base64,iVBORw0K + "chunk_num": 1180, + "create_time": 1759986452748, + "description": " 213213", + "doc_num": 16, + "embd_id": "", + "id": "dcc2871aa4cd11f08d4116ac85b1de0a", + "language": "English", + "name": "k1123", + "pagerank": 0, + "parser_config": { + "auto_keywords": 0, + "auto_questions": 0, + "chunk_token_num": 512, + "delimiter": "\n", + "graphrag": { + "entity_types": [ + "organization", + "person", + "geo", + "event", + "category" + ], + "method": "light", + "use_graphrag": true + }, + "html4excel": false, + "layout_recognize": "Plain Text", + "raptor": { + "max_cluster": 64, + "max_token": 256, + "prompt": "\u8bf7\u603b\u7ed3\u4ee5\u4e0b\u6bb5\u843d\u3002 \u5c0f\u5fc3\u6570\u5b57\uff0c\u4e0d\u8981\u7f16\u9020\u3002 \u6bb5\u843d\u5982\u4e0b\uff1a\n {cluster_content}\n\u4ee5\u4e0a\u5c31\u662f\u4f60\u9700\u8981\u603b\u7ed3\u7684\u5185\u5bb9\u3002", + "random_seed": 0, + "threshold": 0.1, + "use_raptor": false + }, + "topn_tags": 3 + }, + "parser_id": "naive", + "permission": "team", + "size": 56819092, + "token_num": 293067, + "update_time": 1760436169574 +} + */ + // 知识库列表Hook状态接口 export interface UseKnowledgeListState { knowledgeBases: IKnowledge[]; diff --git a/src/interfaces/database/knowledge.ts b/src/interfaces/database/knowledge.ts index ba2f9f9..ecaf539 100644 --- a/src/interfaces/database/knowledge.ts +++ b/src/interfaces/database/knowledge.ts @@ -1,5 +1,56 @@ import type { RunningStatus } from '@/constants/knowledge'; + +/** +{ + "avatar": "data:image/png;base64,iVBORw0K + "chunk_num": 1180, + "create_time": 1759986452748, + "description": " 213213", + "doc_num": 16, + "embd_id": "", + "id": "dcc2871aa4cd11f08d4116ac85b1de0a", + "language": "English", + "name": "k1123", + "pagerank": 0, + "parser_config": { + "auto_keywords": 0, + "auto_questions": 0, + "chunk_token_num": 512, + "delimiter": "\n", + "graphrag": { + "entity_types": [ + "organization", + "person", + "geo", + "event", + "category" + ], + "method": "light", + "use_graphrag": true + }, + "html4excel": false, + "layout_recognize": "Plain Text", + "raptor": { + "max_cluster": 64, + "max_token": 256, + "prompt": "\u8bf7\u603b\u7ed3\u4ee5\u4e0b\u6bb5\u843d\u3002 \u5c0f\u5fc3\u6570\u5b57\uff0c\u4e0d\u8981\u7f16\u9020\u3002 \u6bb5\u843d\u5982\u4e0b\uff1a\n {cluster_content}\n\u4ee5\u4e0a\u5c31\u662f\u4f60\u9700\u8981\u603b\u7ed3\u7684\u5185\u5bb9\u3002", + "random_seed": 0, + "threshold": 0.1, + "use_raptor": false + }, + "topn_tags": 3 + }, + "parser_id": "naive", + "permission": "team", + "size": 56819092, + "token_num": 293067, + "update_time": 1760436169574 + } + */ + + + /** * 知识库接口定义 * 包含知识库的基本信息、配置和状态 @@ -26,7 +77,7 @@ export interface IKnowledge { /** 知识库名称 */ name: string; /** 解析器配置 */ - parser_config: ParserConfig; + parser_config: IParserConfig; /** 解析器ID */ parser_id: string; /** 管道ID */ @@ -83,17 +134,60 @@ export interface IKnowledgeResult { /** * Raptor配置接口 * 用于配置是否启用Raptor功能 + { + "max_cluster": 64, + "max_token": 256, + "prompt": "\u8bf7\u603b\u7ed3\u4ee5\u4e0b\u6bb5\u843d\u3002 + \u5c0f\u5fc3\u6570\u5b57\uff0c\u4e0d\u8981\u7f16\u9020\u3002 \u6bb5\u843d\u5982\u4e0b\uff1a\n + {cluster_content}\n\u4ee5\u4e0a\u5c31\u662f\u4f60\u9700\u8981\u603b\u7ed3\u7684\u5185\u5bb9\u3002", + "random_seed": 0, + "threshold": 0.1, + "use_raptor": false + } */ -export interface Raptor { +export interface IRaptor { /** 是否使用Raptor */ use_raptor: boolean; + /** 最大集群数,可选 */ + max_cluster?: number; + /** 最大令牌数,可选 */ + max_token?: number; + /** 提示模板,可选 */ + prompt?: string; + /** 随机种子,可选 */ + random_seed?: number; + /** 阈值,可选 */ + threshold?: number; +} + +/** + * GraphRAG配置接口 + * 用于配置是否启用GraphRAG功能 + { + "entity_types": [ + "organization", + "person", + "geo", + "event", + "category" + ], + "method": "light", + "use_graphrag": true + } + */ +export interface IGraphrag { + use_graphrag: boolean; + /** 实体类型列表,可选 */ + entity_types?: string[]; + /** 方法,可选 */ + method?: string; } /** * 解析器配置接口 * 定义文档解析的各种参数和选项 */ -export interface ParserConfig { +export interface IParserConfig { /** 起始页码,可选 */ from_page?: number; /** 结束页码,可选 */ @@ -111,13 +205,13 @@ export interface ParserConfig { /** 是否启用布局识别,可选 */ layout_recognize?: boolean; /** Raptor配置,可选 */ - raptor?: Raptor; + raptor?: IRaptor; /** 标签知识库ID列表,可选 */ tag_kb_ids?: string[]; /** 顶部标签数量,可选 */ topn_tags?: number; /** GraphRAG配置,可选 */ - graphrag?: { use_graphrag?: boolean }; + graphrag?: IGraphrag; } /** diff --git a/src/pages/knowledge/components/ChunkMethodForm.tsx b/src/pages/knowledge/components/ChunkMethodForm.tsx index 90e4457..486fd3e 100644 --- a/src/pages/knowledge/components/ChunkMethodForm.tsx +++ b/src/pages/knowledge/components/ChunkMethodForm.tsx @@ -12,11 +12,62 @@ import { FormHelperText, Button, CircularProgress, + Switch, + FormControlLabel, + Chip, + Slider, + Accordion, + AccordionSummary, + AccordionDetails, } from '@mui/material'; import { Save as SaveIcon, + ExpandMore as ExpandMoreIcon, + Add as AddIcon, } from '@mui/icons-material'; import { DOCUMENT_PARSER_TYPES, type DocumentParserType } from '@/constants/knowledge'; +import { type IParserConfig } from '@/interfaces/database/knowledge'; + +/** +{ + "kb_id": "dcc2871aa4cd11f08d4116ac85b1de0a", + "name": "k1123", + "description": " 213213", + "permission": "team", + "parser_id": "naive", + "embd_id": "", + "parser_config": { + "layout_recognize": "Plain Text", + "chunk_token_num": 512, + "delimiter": "\n", + "auto_keywords": 0, + "auto_questions": 0, + "html4excel": false, + "topn_tags": 3, + "raptor": { + "use_raptor": true, + "prompt": "请总结以下段落。 小心数字,不要编造。 段落如下:\n {cluster_content}\n以上就是你需要总结的内容。", + "max_token": 256, + "threshold": 0.1, + "max_cluster": 64, + "random_seed": 0 + }, + "graphrag": { + "use_graphrag": true, + "entity_types": [ + "organization", + "person", + "geo", + "event", + "category" + ], + "method": "light" + } + }, + "pagerank": 0 +} + */ + // 解析器选项配置 const parserOptions = [ @@ -37,12 +88,8 @@ const parserOptions = [ { value: DOCUMENT_PARSER_TYPES.KnowledgeGraph, label: '知识图谱解析器', description: '构建知识图谱结构' }, ]; -export interface ConfigFormData { +export interface ConfigFormData extends IParserConfig { parser_id: DocumentParserType; - chunk_token_count?: number; - layout_recognize?: boolean; - task_page_size?: number; - [key: string]: any; } interface ChunkMethodFormProps { @@ -61,232 +108,432 @@ function ChunkMethodForm({ isSubmitting = false, onCancel, disabled = false, - submitButtonText = '提交', + submitButtonText = '保存', cancelButtonText = '取消' }: ChunkMethodFormProps) { const selectedParser: DocumentParserType = form.watch('parser_id'); + const [entityTypes, setEntityTypes] = React.useState(['organization', 'person', 'geo', 'event', 'category']); - // 根据选择的解析器显示不同的配置选项 - const renderParserSpecificConfig = () => { - switch (selectedParser) { - case DOCUMENT_PARSER_TYPES.Naive: - return ( - - + // 通用配置部分 + const renderGeneralConfig = () => ( + + }> + 通用 + + + + {/* 切片方法 */} + + + 切片方法 + + 选择适合您文档类型的解析器 + + + + {/* PDF解析器 */} + + + PDF解析器 + + + + + {/* 嵌入模型 */} + + + 嵌入模型 + + + + + {/* 建议文本块大小 */} + + 建议文本块大小 + + + + + 512 + + + + {/* 文本分段标识符 */} + + + + + + + ); + + // 页面排名配置部分 + const renderPageRankConfig = () => ( + + }> + 页面排名 + + + + {/* 页面排名 */} + + + 页面排名 + - - + + + + {/* 自动关键词提取 */} + + + 自动关键词提取 + - + - ); - case DOCUMENT_PARSER_TYPES.Table: - return ( - - - - 表格解析器会自动处理表格结构,无需额外配置。 - - - - ); - - case DOCUMENT_PARSER_TYPES.KnowledgeGraph: - return ( - - + {/* 自动问题提取 */} + + + 自动问题提取 + - + - ); - case DOCUMENT_PARSER_TYPES.Picture: - return ( - - - + + } + label="表格转HTML" + /> + + + {/* 标签集 */} + + + 标签集 + + - ); + + + + ); - default: - return ( - - - - 该解析器使用默认配置,无需额外设置。 - - + // RAPTOR集成配置部分 + const renderRaptorConfig = () => ( + + }> + 使用召回增强RAPTOR集成 + + + + {/* 启用RAPTOR */} + + + } + label="使用召回增强RAPTOR集成" + /> - ); - } - }; + + {/* 提示词 */} + + + + + {/* 最大token数 */} + + 最大token数 + + + + + 256 + + + + {/* 阈值 */} + + 阈值 + + + + + 0.1 + + + + {/* 最大聚类数 */} + + 最大聚类数 + + + + + 64 + + + + {/* 随机种子 */} + + + + + + ) + }} + /> + + + + + ); + + // 提取知识图谱配置部分 + const renderGraphRagConfig = () => ( + + }> + 提取知识图谱 + + + + {/* 启用知识图谱 */} + + + } + label="提取知识图谱" + /> + + + {/* 添加实体类型 */} + + + + + {/* 实体类型标签 */} + + 实体类型 + + {entityTypes.map((type, index) => ( + { + setEntityTypes(prev => prev.filter((_, i) => i !== index)); + }} + disabled={disabled} + /> + ))} + + + + {/* 方法 */} + + + 方法 + + + + + {/* 实体归一化 */} + + } + label="实体归一化" + /> + + + {/* 社区报告生成 */} + + } + label="社区报告生成" + /> + + + + + ); return ( - - 解析配置 + + 配置 + + + 在这里更新您的知识库详细信息,并更改切片方法。 - - {/* 解析器选择 */} - - - 解析器类型 - - - 选择适合您文档类型的解析器 - - - + + {renderGeneralConfig()} + {renderPageRankConfig()} + {renderRaptorConfig()} + {renderGraphRagConfig()} + - {/* 解析器特定配置 */} - - - - 解析器配置 - - {renderParserSpecificConfig()} - - - - {/* 通用配置 */} - - - - 通用配置 - - - - - - - - - - - - - {/* 操作按钮 */} - - - {onCancel && ( - - )} - - - - + {/* 操作按钮 */} + + {onCancel && ( + + )} + + ); }