feat: Migrate document parsing to Aliyun and update embedding configurations
- Updated LocalDocumentParser to include raw_layouts and artifact_prefix from settings. - Added new documents with failure reasons and metadata to documents.json for better error tracking. - Created a new documentation file detailing the Aliyun ingest implementation process. - Updated RFC to reflect changes in the parsing backend and embedding dimensions. - Modified tests to accommodate the new embedding dimension of 1024 and updated parser and chunk builder assertions. - Verified migration configurations to ensure correct settings for embedding model and backend.
This commit is contained in:
15
.env.example
15
.env.example
@@ -9,12 +9,12 @@ DEBUG=false
|
||||
# ===== Milvus向量数据库配置 =====
|
||||
MILVUS_HOST=localhost
|
||||
MILVUS_PORT=19530
|
||||
MILVUS_COLLECTION=regulations_dense_1536
|
||||
MILVUS_COLLECTION=regulations_dense_1024_v1
|
||||
MILVUS_DB_NAME=default
|
||||
|
||||
# ===== 嵌入模型配置 =====
|
||||
EMBEDDING_MODEL=text-embedding-v3
|
||||
EMBEDDING_DIM=1536
|
||||
EMBEDDING_DIM=1024
|
||||
EMBEDDING_API_KEY=your_embedding_api_key_here
|
||||
EMBEDDING_BASE_URL=http://6.86.80.4:30080/v1
|
||||
EMBEDDING_TIMEOUT_SECONDS=120
|
||||
@@ -44,11 +44,20 @@ CHUNK_SIZE=512
|
||||
CHUNK_OVERLAP=50
|
||||
MAX_FILE_SIZE_MB=100
|
||||
DOCUMENT_METADATA_PATH=backend/data/documents.json
|
||||
PARSER_BACKEND=aliyun
|
||||
CHUNK_BACKEND=aliyun
|
||||
|
||||
# ===== 阿里云文档解析 =====
|
||||
ALIBABA_ACCESS_KEY_ID=your_aliyun_access_key_id
|
||||
ALIBABA_ACCESS_KEY_SECRET=your_aliyun_access_key_secret
|
||||
ALIBABA_ENDPOINT=docmind-api.cn-hangzhou.aliyuncs.com
|
||||
ALIYUN_PARSE_POLL_INTERVAL_SECONDS=5
|
||||
ALIYUN_PARSE_TIMEOUT_SECONDS=900
|
||||
ALIYUN_PARSE_LAYOUT_STEP_SIZE=50
|
||||
ALIYUN_LLM_ENHANCEMENT=true
|
||||
ALIYUN_ENHANCEMENT_MODE=VLM
|
||||
DOCUMENT_PARSE_ARTIFACT_PREFIX=artifacts
|
||||
PARSER_FAILURE_MODE=fail
|
||||
|
||||
# ===== API服务配置 =====
|
||||
API_HOST=0.0.0.0
|
||||
@@ -73,7 +82,7 @@ DEEPSEEK_BASE_URL=http://6.86.80.4:30080/v1
|
||||
# Qwen系列: qwen3.5-plus, qwen3-plus, qwen-max, qwen-turbo, qwen-long
|
||||
# Qwen VL系列: qwen3-vl-plus, qwen-vl-max
|
||||
# DeepSeek系列: deepseek-v4-flash, deepseek-v3.2, deepseek-v3, deepseek-chat, deepseek-coder
|
||||
QWEN_MODEL=qwen3.5-plus
|
||||
QWEN_MODEL=qwen3.6-plus
|
||||
QWEN_VL_MODEL=qwen3-vl-plus
|
||||
DEEPSEEK_MODEL=deepseek-v4-flash
|
||||
|
||||
|
||||
Reference in New Issue
Block a user