feat: Migrate document parsing to Aliyun and update embedding configurations
- Updated LocalDocumentParser to include raw_layouts and artifact_prefix from settings. - Added new documents with failure reasons and metadata to documents.json for better error tracking. - Created a new documentation file detailing the Aliyun ingest implementation process. - Updated RFC to reflect changes in the parsing backend and embedding dimensions. - Modified tests to accommodate the new embedding dimension of 1024 and updated parser and chunk builder assertions. - Verified migration configurations to ensure correct settings for embedding model and backend.
This commit is contained in:
@@ -64,11 +64,16 @@ def verify_migration_config() -> bool:
|
||||
|
||||
try:
|
||||
assert settings.embedding_model == "text-embedding-v3"
|
||||
assert settings.embedding_dim == 1536
|
||||
assert settings.milvus_collection == "regulations_dense_1536"
|
||||
assert settings.embedding_dim == 1024
|
||||
assert settings.milvus_collection == "regulations_dense_1024_v1"
|
||||
assert settings.parser_backend == "aliyun"
|
||||
assert settings.chunk_backend == "aliyun"
|
||||
logger.info(f"embedding_model={settings.embedding_model}")
|
||||
logger.info(f"embedding_base_url={settings.embedding_base_url}")
|
||||
logger.info(f"embedding_dim={settings.embedding_dim}")
|
||||
logger.info(f"milvus_collection={settings.milvus_collection}")
|
||||
logger.info(f"parser_backend={settings.parser_backend}")
|
||||
logger.info(f"chunk_backend={settings.chunk_backend}")
|
||||
logger.success("migration config ok")
|
||||
return True
|
||||
except Exception as exc:
|
||||
|
||||
Reference in New Issue
Block a user