feat: Migrate document parsing to Aliyun and update embedding configurations
- Updated LocalDocumentParser to include raw_layouts and artifact_prefix from settings. - Added new documents with failure reasons and metadata to documents.json for better error tracking. - Created a new documentation file detailing the Aliyun ingest implementation process. - Updated RFC to reflect changes in the parsing backend and embedding dimensions. - Modified tests to accommodate the new embedding dimension of 1024 and updated parser and chunk builder assertions. - Verified migration configurations to ensure correct settings for embedding model and backend.
This commit is contained in:
18
.env
18
.env
@@ -9,7 +9,7 @@ DEBUG=false
|
||||
# ===== Milvus向量数据库配置(已有)=====
|
||||
MILVUS_HOST=localhost
|
||||
MILVUS_PORT=19530
|
||||
MILVUS_COLLECTION=regulations_dense_1536
|
||||
MILVUS_COLLECTION=regulations_dense_1024_v1
|
||||
MILVUS_DB_NAME=default
|
||||
|
||||
# ===== MinIO对象存储配置(已有)=====
|
||||
@@ -34,7 +34,7 @@ POSTGRES_DB=compliance_db
|
||||
|
||||
# ===== 嵌入模型配置 =====
|
||||
EMBEDDING_MODEL=text-embedding-v3
|
||||
EMBEDDING_DIM=1536
|
||||
EMBEDDING_DIM=1024
|
||||
EMBEDDING_API_KEY=sk-fVr9KmDZNC4pGDBQj0EUWz9bDmFzNxjYC9EzZpe2bVDsxtz8
|
||||
EMBEDDING_BASE_URL=http://6.86.80.4:30080/v1
|
||||
EMBEDDING_TIMEOUT_SECONDS=120
|
||||
@@ -59,7 +59,7 @@ LLM_TEMPERATURE=0.7
|
||||
# 获取API Key: https://dashscope.console.aliyun.com/
|
||||
QWEN_API_KEY=sk-fVr9KmDZNC4pGDBQj0EUWz9bDmFzNxjYC9EzZpe2bVDsxtz8
|
||||
QWEN_BASE_URL=http://6.86.80.4:30080/v1
|
||||
QWEN_MODEL=qwen3.5-plus
|
||||
QWEN_MODEL=qwen3.6-plus
|
||||
QWEN_VL_MODEL=qwen3-vl-plus
|
||||
|
||||
# ===== DeepSeek API配置 =====
|
||||
@@ -73,3 +73,15 @@ RAG_TOP_K=10
|
||||
RAG_MAX_CONTEXT_TOKENS=4000
|
||||
RAG_SUMMARY_MAX_TOKENS=1024
|
||||
RAG_SKILLS_MAX_TOKENS=2048
|
||||
|
||||
# ===== 阿里云文档解析 =====
|
||||
ALIBABA_ACCESS_KEY_ID=LTAI5t9ZjvwSU9bKuMyiExrE
|
||||
ALIBABA_ACCESS_KEY_SECRET=hNvY6XocmEO6inYlrmiBwBcx5OfidL
|
||||
ALIBABA_ENDPOINT=docmind-api.cn-hangzhou.aliyuncs.com
|
||||
ALIYUN_PARSE_POLL_INTERVAL_SECONDS=5
|
||||
ALIYUN_PARSE_TIMEOUT_SECONDS=900
|
||||
ALIYUN_PARSE_LAYOUT_STEP_SIZE=50
|
||||
ALIYUN_LLM_ENHANCEMENT=true
|
||||
ALIYUN_ENHANCEMENT_MODE=VLM
|
||||
DOCUMENT_PARSE_ARTIFACT_PREFIX=artifacts
|
||||
PARSER_FAILURE_MODE=fail
|
||||
|
||||
Reference in New Issue
Block a user