From 74c4f798a5b723ea148a9105d26f5c31236ea8f4 Mon Sep 17 00:00:00 2001 From: "guangfei.zhao" Date: Mon, 3 Nov 2025 17:47:33 +0800 Subject: [PATCH] feat(knowledge): add RAPTOR and Knowledge Graph config sections to parsers refactor(knowledge): reorganize parser configuration components into accordions --- .gitignore | 3 +- src/assets/svg/data-flow/data-icon-bri.svg | 15 + src/assets/svg/data-flow/data-icon.svg | 15 + .../svg/data-flow/processing-icon-bri.svg | 6 + src/assets/svg/data-flow/processing-icon.svg | 6 + .../svg/data-flow/total-files-icon-bri.svg | 6 + src/assets/svg/data-flow/total-files-icon.svg | 6 + src/assets/svg/llm/cometapi.svg | 6 + src/assets/svg/llm/deerapi.svg | 5 + src/assets/svg/llm/modelscope.svg | 2 +- src/assets/svg/llm/siliconflow.svg | 4 +- src/assets/svg/llm/token-pony.svg | 8 + src/assets/svg/rerun.svg | 1 + src/interfaces/database/knowledge.ts | 114 +++-- src/locales/en.ts | 4 +- src/locales/zh.ts | 2 + src/pages/Home.tsx | 6 +- .../knowledge/components/ChunkMethodForm.tsx | 1 - .../knowledge/components/GeneralForm.tsx | 28 +- src/pages/knowledge/configuration/audio.tsx | 72 ++- src/pages/knowledge/configuration/book.tsx | 103 +++-- .../knowledge/configuration/common-items.tsx | 437 +++++++++++------- .../configuration-form-container.tsx | 5 +- src/pages/knowledge/configuration/email.tsx | 51 +- src/pages/knowledge/configuration/index.tsx | 6 +- .../configuration/knowledge-graph.tsx | 125 +++-- src/pages/knowledge/configuration/laws.tsx | 51 +- src/pages/knowledge/configuration/manual.tsx | 96 ++-- src/pages/knowledge/configuration/naive.tsx | 91 +--- src/pages/knowledge/configuration/one.tsx | 107 +++-- src/pages/knowledge/configuration/paper.tsx | 104 +++-- src/pages/knowledge/configuration/picture.tsx | 51 +- .../knowledge/configuration/presentation.tsx | 64 ++- src/pages/knowledge/configuration/qa.tsx | 49 +- src/pages/knowledge/configuration/resume.tsx | 51 +- src/pages/knowledge/configuration/table.tsx | 51 +- src/pages/knowledge/configuration/tag.tsx | 53 ++- src/pages/knowledge/create.tsx | 18 +- src/pages/knowledge/setting.tsx | 5 + src/utils/request.ts | 40 +- 40 files changed, 1251 insertions(+), 617 deletions(-) create mode 100644 src/assets/svg/data-flow/data-icon-bri.svg create mode 100644 src/assets/svg/data-flow/data-icon.svg create mode 100644 src/assets/svg/data-flow/processing-icon-bri.svg create mode 100644 src/assets/svg/data-flow/processing-icon.svg create mode 100644 src/assets/svg/data-flow/total-files-icon-bri.svg create mode 100644 src/assets/svg/data-flow/total-files-icon.svg create mode 100644 src/assets/svg/llm/cometapi.svg create mode 100644 src/assets/svg/llm/deerapi.svg create mode 100644 src/assets/svg/llm/token-pony.svg create mode 100644 src/assets/svg/rerun.svg diff --git a/.gitignore b/.gitignore index 15ee375..8bff160 100644 --- a/.gitignore +++ b/.gitignore @@ -24,5 +24,4 @@ dist-ssr *.sw? # rag core -rag_web_core_v0.20.5 -rag_web_core_deprecated \ No newline at end of file +ragflow_core_v0.21.1 \ No newline at end of file diff --git a/src/assets/svg/data-flow/data-icon-bri.svg b/src/assets/svg/data-flow/data-icon-bri.svg new file mode 100644 index 0000000..355ea90 --- /dev/null +++ b/src/assets/svg/data-flow/data-icon-bri.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/src/assets/svg/data-flow/data-icon.svg b/src/assets/svg/data-flow/data-icon.svg new file mode 100644 index 0000000..eddb6a3 --- /dev/null +++ b/src/assets/svg/data-flow/data-icon.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/src/assets/svg/data-flow/processing-icon-bri.svg b/src/assets/svg/data-flow/processing-icon-bri.svg new file mode 100644 index 0000000..96c8e76 --- /dev/null +++ b/src/assets/svg/data-flow/processing-icon-bri.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/assets/svg/data-flow/processing-icon.svg b/src/assets/svg/data-flow/processing-icon.svg new file mode 100644 index 0000000..46acc8c --- /dev/null +++ b/src/assets/svg/data-flow/processing-icon.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/assets/svg/data-flow/total-files-icon-bri.svg b/src/assets/svg/data-flow/total-files-icon-bri.svg new file mode 100644 index 0000000..795b5f5 --- /dev/null +++ b/src/assets/svg/data-flow/total-files-icon-bri.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/assets/svg/data-flow/total-files-icon.svg b/src/assets/svg/data-flow/total-files-icon.svg new file mode 100644 index 0000000..702e309 --- /dev/null +++ b/src/assets/svg/data-flow/total-files-icon.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/assets/svg/llm/cometapi.svg b/src/assets/svg/llm/cometapi.svg new file mode 100644 index 0000000..8d98118 --- /dev/null +++ b/src/assets/svg/llm/cometapi.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/assets/svg/llm/deerapi.svg b/src/assets/svg/llm/deerapi.svg new file mode 100644 index 0000000..0655cd5 --- /dev/null +++ b/src/assets/svg/llm/deerapi.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/svg/llm/modelscope.svg b/src/assets/svg/llm/modelscope.svg index cdeb9e5..8b3778f 100644 --- a/src/assets/svg/llm/modelscope.svg +++ b/src/assets/svg/llm/modelscope.svg @@ -1 +1 @@ - \ No newline at end of file + diff --git a/src/assets/svg/llm/siliconflow.svg b/src/assets/svg/llm/siliconflow.svg index 544672f..c678506 100644 --- a/src/assets/svg/llm/siliconflow.svg +++ b/src/assets/svg/llm/siliconflow.svg @@ -1,5 +1,5 @@ - + - + \ No newline at end of file diff --git a/src/assets/svg/llm/token-pony.svg b/src/assets/svg/llm/token-pony.svg new file mode 100644 index 0000000..a504e22 --- /dev/null +++ b/src/assets/svg/llm/token-pony.svg @@ -0,0 +1,8 @@ + + + logo2 + + + + + \ No newline at end of file diff --git a/src/assets/svg/rerun.svg b/src/assets/svg/rerun.svg new file mode 100644 index 0000000..cd972f4 --- /dev/null +++ b/src/assets/svg/rerun.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/src/interfaces/database/knowledge.ts b/src/interfaces/database/knowledge.ts index f354dec..f8a6022 100644 --- a/src/interfaces/database/knowledge.ts +++ b/src/interfaces/database/knowledge.ts @@ -4,50 +4,68 @@ import type { IDocumentInfo } from './document'; /** { - "avatar": " - "chunk_num": 1180, - "create_time": 1759986452748, - "description": " 213213", - "doc_num": 16, - "embd_id": "", - "id": "dcc2871aa4cd11f08d4116ac85b1de0a", - "language": "English", - "name": "k1123", - "pagerank": 0, - "parser_config": { - "auto_keywords": 0, - "auto_questions": 0, - "chunk_token_num": 512, - "delimiter": "\n", - "graphrag": { - "entity_types": [ - "organization", - "person", - "geo", - "event", - "category" - ], - "method": "light", - "use_graphrag": true - }, - "html4excel": false, - "layout_recognize": "Plain Text", - "raptor": { - "max_cluster": 64, - "max_token": 256, - "prompt": "\u8bf7\u603b\u7ed3\u4ee5\u4e0b\u6bb5\u843d\u3002 \u5c0f\u5fc3\u6570\u5b57\uff0c\u4e0d\u8981\u7f16\u9020\u3002 \u6bb5\u843d\u5982\u4e0b\uff1a\n {cluster_content}\n\u4ee5\u4e0a\u5c31\u662f\u4f60\u9700\u8981\u603b\u7ed3\u7684\u5185\u5bb9\u3002", - "random_seed": 0, - "threshold": 0.1, - "use_raptor": false - }, - "topn_tags": 3 - }, - "parser_id": "naive", - "permission": "team", - "size": 56819092, - "token_num": 293067, - "update_time": 1760436169574 - } + "code": 0, + "data": { + "avatar": null, + "chunk_num": 1, + "create_date": "Mon, 03 Nov 2025 10:39:46 GMT", + "create_time": 1762137586655, + "created_by": "cd77706ca4d811f0876d42010a8e0002", + "description": " ", + "doc_num": 2, + "embd_id": "qwen3-embedding:4b@Ollama#cd77706ca4d811f0876d42010a8e0002", + "graphrag_task_finish_at": null, + "graphrag_task_id": null, + "id": "5c7b2afeb85e11f09ce242010a8e0004", + "language": "English", + "mindmap_task_finish_at": null, + "mindmap_task_id": null, + "name": "k1123", + "pagerank": 0, + "parser_config": { + "auto_keywords": 0, + "auto_questions": 0, + "chunk_token_num": 512, + "delimiter": "\n", + "graphrag": { + "entity_types": [ + "organization", + "person", + "geo", + "event", + "category" + ], + "method": "light", + "use_graphrag": true + }, + "html4excel": false, + "layout_recognize": "DeepDOC", + "raptor": { + "max_cluster": 64, + "max_token": 256, + "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.", + "random_seed": 0, + "threshold": 0.1, + "use_raptor": true + }, + "toc_extraction": false, + "topn_tags": 3 + }, + "parser_id": "naive", + "permission": "me", + "pipeline_id": "", + "raptor_task_finish_at": null, + "raptor_task_id": null, + "similarity_threshold": 0.2, + "status": "1", + "tenant_id": "cd77706ca4d811f0876d42010a8e0002", + "token_num": 256, + "update_date": "Mon, 03 Nov 2025 16:30:55 GMT", + "update_time": 1762158655195, + "vector_similarity_weight": 0.3 + }, + "message": "success" +} */ @@ -61,6 +79,8 @@ export interface IKnowledge { avatar?: any; /** 知识库中的文档块数量 */ chunk_num: number; + /** 知识库Pagerank值 */ + pagerank: number; /** 创建日期(字符串格式) */ create_date: string; /** 创建时间戳 */ @@ -119,6 +139,10 @@ export interface IKnowledge { mindmap_task_finish_at?: string; /** 思维导图任务ID,可选 */ mindmap_task_id?: string; + /** GraphRAG任务完成时间,可选 */ + graphrag_task_finish_at?: string; + /** GraphRAG任务ID,可选 */ + graphrag_task_id?: string; } /** @@ -213,6 +237,8 @@ export interface IParserConfig { topn_tags?: number; /** GraphRAG配置,可选 */ graphrag?: IGraphrag; + /** 目录提取配置,可选 */ + toc_extraction?: boolean; } /** diff --git a/src/locales/en.ts b/src/locales/en.ts index 3904b20..2fd394a 100644 --- a/src/locales/en.ts +++ b/src/locales/en.ts @@ -206,8 +206,10 @@ export default { autoQuestions: 'Auto Questions Extraction', enterQuestionCount: 'Enter question count', pdfParser: 'PDF Parser', - plainText: 'Plain Text', + plainText: 'Naive', + minerU: 'MinerU', experimental: 'Experimental', + tocEnhance: 'TOC Enhance', delimiter: 'Delimiter', enterDelimiter: 'Enter delimiter', embeddingModel: 'Embedding Model', diff --git a/src/locales/zh.ts b/src/locales/zh.ts index 8a91be0..7a1dc8e 100644 --- a/src/locales/zh.ts +++ b/src/locales/zh.ts @@ -191,6 +191,8 @@ export default { enterQuestionCount: '输入问题数量', pdfParser: 'PDF解析器', plainText: '纯文本', + minerU: 'MinerU', + tocEnhance: 'TOC Enhance', experimental: '实验性', delimiter: '分隔符', enterDelimiter: '请输入分隔符', diff --git a/src/pages/Home.tsx b/src/pages/Home.tsx index aa83494..393c23b 100644 --- a/src/pages/Home.tsx +++ b/src/pages/Home.tsx @@ -175,9 +175,9 @@ const Home = () => { Model Overview - Embedding Model: text-embedding-3-large - Generator: gpt-4o-mini - Reranker: cross-encoder-v2 + Embedding Model: text-embedding-3-large + Generator: gpt-4o-mini + Reranker: cross-encoder-v2 Chunking: 512 tokens Retriever Top-K: 8 diff --git a/src/pages/knowledge/components/ChunkMethodForm.tsx b/src/pages/knowledge/components/ChunkMethodForm.tsx index 18898e1..d7df034 100644 --- a/src/pages/knowledge/components/ChunkMethodForm.tsx +++ b/src/pages/knowledge/components/ChunkMethodForm.tsx @@ -20,7 +20,6 @@ import { PresentationConfiguration, OneConfiguration, TagConfiguration, - ChunkMethodItem, } from '../configuration'; // 配置组件映射表 diff --git a/src/pages/knowledge/components/GeneralForm.tsx b/src/pages/knowledge/components/GeneralForm.tsx index 690596c..11467e8 100644 --- a/src/pages/knowledge/components/GeneralForm.tsx +++ b/src/pages/knowledge/components/GeneralForm.tsx @@ -18,6 +18,8 @@ import { Delete as DeleteIcon, } from '@mui/icons-material'; import { useTranslation } from 'react-i18next'; +import { EmbeddingModelItem } from '../configuration'; +import { PageRankItem, TagsItem } from '../configuration/common-items'; interface GeneralFormProps { form?: UseFormReturn; @@ -37,7 +39,7 @@ function GeneralForm({ cancelButtonText, }: GeneralFormProps = {}) { const { t } = useTranslation(); - + const defaultSubmitButtonText = submitButtonText || t('common.save'); const defaultCancelButtonText = cancelButtonText || t('common.cancel'); // 优先使用props传递的form,否则使用FormProvider的context @@ -47,9 +49,9 @@ function GeneralForm({ } catch (error) { contextForm = null; } - + const form = propForm || contextForm; - + if (!form) { console.error('GeneralForm: No form context found. Component must be used within a FormProvider or receive a form prop.'); return ( @@ -60,7 +62,7 @@ function GeneralForm({ ); } - + const { control, watch, setValue, handleSubmit } = form; const fileInputRef = useRef(null); @@ -94,7 +96,7 @@ function GeneralForm({ - + {!avatar && } - +