feat: Migrate document parsing to Aliyun and update embedding configurations
- Updated LocalDocumentParser to include raw_layouts and artifact_prefix from settings. - Added new documents with failure reasons and metadata to documents.json for better error tracking. - Created a new documentation file detailing the Aliyun ingest implementation process. - Updated RFC to reflect changes in the parsing backend and embedding dimensions. - Modified tests to accommodate the new embedding dimension of 1024 and updated parser and chunk builder assertions. - Verified migration configurations to ensure correct settings for embedding model and backend.
This commit is contained in:
@@ -27,12 +27,12 @@ class Settings(BaseSettings):
|
||||
# Milvus
|
||||
milvus_host: str = "localhost"
|
||||
milvus_port: int = 19530
|
||||
milvus_collection: str = "regulations_dense_1536"
|
||||
milvus_collection: str = "regulations_dense_1024_v1"
|
||||
|
||||
# LLM / embedding defaults aligned with the migrated backend path.
|
||||
llm_model: str = "qwen-max"
|
||||
embedding_model: str = "text-embedding-v3"
|
||||
embedding_dim: int = 1536
|
||||
embedding_dim: int = 1024
|
||||
|
||||
# Legacy workflow compatibility only.
|
||||
vector_top_k: int = 10
|
||||
@@ -47,7 +47,7 @@ class Settings(BaseSettings):
|
||||
api_port: int = 8000
|
||||
|
||||
# Legacy aliases retained for old utility modules.
|
||||
regulations_collection: str = "regulations_dense_1536"
|
||||
regulations_collection: str = "regulations_dense_1024_v1"
|
||||
compliance_collection: str = "compliance_cache"
|
||||
|
||||
# Preserve the legacy module API while keeping env resolution centralized at the repo root.
|
||||
|
||||
Reference in New Issue
Block a user