feat: Migrate document parsing to Aliyun and update embedding configurations
- Updated LocalDocumentParser to include raw_layouts and artifact_prefix from settings. - Added new documents with failure reasons and metadata to documents.json for better error tracking. - Created a new documentation file detailing the Aliyun ingest implementation process. - Updated RFC to reflect changes in the parsing backend and embedding dimensions. - Modified tests to accommodate the new embedding dimension of 1024 and updated parser and chunk builder assertions. - Verified migration configurations to ensure correct settings for embedding model and backend.
This commit is contained in:
@@ -21,5 +21,365 @@
|
||||
"generate_summary": true,
|
||||
"structure_nodes": 0
|
||||
}
|
||||
},
|
||||
"44121fbb": {
|
||||
"doc_id": "44121fbb",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "44121fbb/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5cb9d0>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T09:53:47.996183+00:00",
|
||||
"updated_at": "2026-05-18T09:53:50.825868+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5cb9d0>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"77debb4a": {
|
||||
"doc_id": "77debb4a",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "77debb4a/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a6dd480>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:05:46.104259+00:00",
|
||||
"updated_at": "2026-05-18T10:05:48.704061+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a6dd480>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"d12bdcc8": {
|
||||
"doc_id": "d12bdcc8",
|
||||
"doc_name": "TCT算法接口.pdf",
|
||||
"file_name": "TCT算法接口.pdf",
|
||||
"object_name": "d12bdcc8/TCT算法接口.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 165557,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5bf570>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:07:22.199824+00:00",
|
||||
"updated_at": "2026-05-18T10:07:24.653751+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5bf570>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"3c2e8c9c": {
|
||||
"doc_id": "3c2e8c9c",
|
||||
"doc_name": "20260415_Continental tire mobile app solution.pdf",
|
||||
"file_name": "20260415_Continental tire mobile app solution.pdf",
|
||||
"object_name": "3c2e8c9c/20260415_Continental tire mobile app solution.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 2178074,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5bc8d0>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:09:58.338274+00:00",
|
||||
"updated_at": "2026-05-18T10:10:01.295502+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5bc8d0>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"d22d21a0": {
|
||||
"doc_id": "d22d21a0",
|
||||
"doc_name": "20260415_Continental tire mobile app solution.pdf",
|
||||
"file_name": "20260415_Continental tire mobile app solution.pdf",
|
||||
"object_name": "d22d21a0/20260415_Continental tire mobile app solution.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 2178074,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614b994160>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:12:20.078027+00:00",
|
||||
"updated_at": "2026-05-18T10:12:22.999843+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614b994160>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"35f129d3": {
|
||||
"doc_id": "35f129d3",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "35f129d3/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614b995370>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:13:24.706512+00:00",
|
||||
"updated_at": "2026-05-18T10:13:27.180509+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614b995370>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"efc21515": {
|
||||
"doc_id": "efc21515",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "efc21515/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "Client error '400 Bad Request' for url 'http://6.86.80.4:30080/v1/embeddings'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
|
||||
"created_at": "2026-05-18T13:47:32.076786+00:00",
|
||||
"updated_at": "2026-05-18T13:47:57.998073+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-a6e84447457f43cb85f95225cfc6495b",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/efc21515/layouts.json",
|
||||
"structure_nodes": "artifacts/efc21515/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/efc21515/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/efc21515/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "Client error '400 Bad Request' for url 'http://6.86.80.4:30080/v1/embeddings'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400"
|
||||
}
|
||||
},
|
||||
"0d4b08bc": {
|
||||
"doc_id": "0d4b08bc",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "0d4b08bc/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "Client error '404 Not Found' for url 'http://6.86.80.4:30080/v1/embeddings'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404",
|
||||
"created_at": "2026-05-18T14:03:15.134344+00:00",
|
||||
"updated_at": "2026-05-18T14:03:34.843448+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-78353d85daa24147b68d8fb71895179f",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/0d4b08bc/layouts.json",
|
||||
"structure_nodes": "artifacts/0d4b08bc/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/0d4b08bc/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/0d4b08bc/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "Client error '404 Not Found' for url 'http://6.86.80.4:30080/v1/embeddings'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404"
|
||||
}
|
||||
},
|
||||
"4302f314": {
|
||||
"doc_id": "4302f314",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "4302f314/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "embedding 维度不匹配,期望 1536",
|
||||
"created_at": "2026-05-18T14:11:29.943973+00:00",
|
||||
"updated_at": "2026-05-18T14:11:48.554500+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-23935ee455ac4b26ac4201ac4781ee52",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/4302f314/layouts.json",
|
||||
"structure_nodes": "artifacts/4302f314/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/4302f314/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/4302f314/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "embedding 维度不匹配,期望 1536"
|
||||
}
|
||||
},
|
||||
"765ed1ee": {
|
||||
"doc_id": "765ed1ee",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "765ed1ee/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "<MilvusException: (code=1100, message=the dim (1024) of field data(embedding) is not equal to schema dim (1536): invalid parameter[expected=1536][actual=1024])>",
|
||||
"created_at": "2026-05-18T14:18:28.875138+00:00",
|
||||
"updated_at": "2026-05-18T14:18:57.389110+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-f116856bc29245baa2531b245078a701",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/765ed1ee/layouts.json",
|
||||
"structure_nodes": "artifacts/765ed1ee/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/765ed1ee/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/765ed1ee/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "<MilvusException: (code=1100, message=the dim (1024) of field data(embedding) is not equal to schema dim (1536): invalid parameter[expected=1536][actual=1024])>"
|
||||
}
|
||||
},
|
||||
"05cabe09": {
|
||||
"doc_id": "05cabe09",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "05cabe09/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "embedding 维度不匹配,期望 1536",
|
||||
"created_at": "2026-05-18T14:24:32.156500+00:00",
|
||||
"updated_at": "2026-05-18T14:24:50.114138+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-897d858983df48e28e9819e563d46208",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/05cabe09/layouts.json",
|
||||
"structure_nodes": "artifacts/05cabe09/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/05cabe09/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/05cabe09/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "embedding 维度不匹配,期望 1536"
|
||||
}
|
||||
},
|
||||
"9acb2ba0": {
|
||||
"doc_id": "9acb2ba0",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "9acb2ba0/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "indexed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 27,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "regulations_dense_1024_v1",
|
||||
"error_message": "",
|
||||
"created_at": "2026-05-18T14:29:01.368719+00:00",
|
||||
"updated_at": "2026-05-18T14:29:23.699068+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-e5fd4a5419e74d569c562e389e6ae72c",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/9acb2ba0/layouts.json",
|
||||
"structure_nodes": "artifacts/9acb2ba0/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/9acb2ba0/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/9acb2ba0/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "indexed",
|
||||
"index_collection": "regulations_dense_1024_v1"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user