Refactor document handling and update Milvus collection settings
- Removed multiple failed document entries from `documents.json`. - Added a new document entry with updated metadata and changed the index name to `regulations_dense_1024_v2`. - Updated architecture documentation to reflect changes in the Milvus collection name. - Adjusted requirements by removing the sqlalchemy dependency. - Modified test cases to align with new document structure and naming conventions. - Introduced a new test file for Milvus vector index runtime recovery and error handling. - Updated assertions in various test files to ensure compatibility with the new schema.
This commit is contained in:
131
backend/data/document_processing.json
Normal file
131
backend/data/document_processing.json
Normal file
@@ -0,0 +1,131 @@
|
||||
{
|
||||
"runs": {
|
||||
"8e722053-5009-40fe-a483-535b40ebbb16": {
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"trigger_type": "upload",
|
||||
"run_status": "succeeded",
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"chunk_backend": "aliyun",
|
||||
"embedding_model": "text-embedding-v3",
|
||||
"index_name": "regulations_dense_1024_v2",
|
||||
"started_at": "2026-05-26T12:18:27.208692+00:00",
|
||||
"stored_at": "2026-05-26T12:18:27.712855+00:00",
|
||||
"parsed_at": "2026-05-26T12:18:42.989238+00:00",
|
||||
"indexed_at": "2026-05-26T12:18:51.172418+00:00",
|
||||
"finished_at": "2026-05-26T12:18:51.172418+00:00",
|
||||
"layout_count": 48,
|
||||
"structure_node_count": 6,
|
||||
"semantic_block_count": 33,
|
||||
"vector_chunk_count": 34,
|
||||
"chunk_count": 34,
|
||||
"failure_stage": "",
|
||||
"error_message": "",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parse_task_id": "docmind-20260526-10b94713ccb348498b12180a5dcf32ff"
|
||||
}
|
||||
}
|
||||
},
|
||||
"status_events": {
|
||||
"d0532baf-0d65-4130-b282-ec51f04132fd": {
|
||||
"event_id": "d0532baf-0d65-4130-b282-ec51f04132fd",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"from_status": "",
|
||||
"to_status": "pending",
|
||||
"stage": "document_created",
|
||||
"message": "Document record created",
|
||||
"metadata": {},
|
||||
"occurred_at": "2026-05-26T12:18:27.235921+00:00"
|
||||
},
|
||||
"a5e32db5-25c3-4c73-a987-7311f0e72a31": {
|
||||
"event_id": "a5e32db5-25c3-4c73-a987-7311f0e72a31",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"from_status": "pending",
|
||||
"to_status": "stored",
|
||||
"stage": "store",
|
||||
"message": "Source file stored",
|
||||
"metadata": {},
|
||||
"occurred_at": "2026-05-26T12:18:27.741462+00:00"
|
||||
},
|
||||
"18e04ce7-9d7a-4008-8600-e2590100bd85": {
|
||||
"event_id": "18e04ce7-9d7a-4008-8600-e2590100bd85",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"from_status": "stored",
|
||||
"to_status": "parsed",
|
||||
"stage": "parse",
|
||||
"message": "Document parsed",
|
||||
"metadata": {
|
||||
"artifact_count": 4
|
||||
},
|
||||
"occurred_at": "2026-05-26T12:18:43.218026+00:00"
|
||||
},
|
||||
"d3b06025-5c91-4a42-9e5f-dce1c5312b96": {
|
||||
"event_id": "d3b06025-5c91-4a42-9e5f-dce1c5312b96",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"from_status": "parsed",
|
||||
"to_status": "indexed",
|
||||
"stage": "index",
|
||||
"message": "Document indexed",
|
||||
"metadata": {
|
||||
"chunk_count": 34,
|
||||
"index_name": "regulations_dense_1024_v2"
|
||||
},
|
||||
"occurred_at": "2026-05-26T12:18:51.195442+00:00"
|
||||
}
|
||||
},
|
||||
"artifacts": {
|
||||
"47fe2877-a8f5-4e1d-901b-80cd0194ba96": {
|
||||
"artifact_id": "47fe2877-a8f5-4e1d-901b-80cd0194ba96",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"artifact_type": "layouts",
|
||||
"object_name": "artifacts/7cbdfe3c/layouts.json",
|
||||
"content_type": "application/json",
|
||||
"byte_size": 0,
|
||||
"checksum": "",
|
||||
"metadata": {},
|
||||
"created_at": "2026-05-26T12:18:43.188467+00:00"
|
||||
},
|
||||
"44aa075b-86b2-48a7-9d14-a2453bd53863": {
|
||||
"artifact_id": "44aa075b-86b2-48a7-9d14-a2453bd53863",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"artifact_type": "structure_nodes",
|
||||
"object_name": "artifacts/7cbdfe3c/structure_nodes.json",
|
||||
"content_type": "application/json",
|
||||
"byte_size": 0,
|
||||
"checksum": "",
|
||||
"metadata": {},
|
||||
"created_at": "2026-05-26T12:18:43.188494+00:00"
|
||||
},
|
||||
"dedcc8fe-fa58-4de6-984d-f44332af5204": {
|
||||
"artifact_id": "dedcc8fe-fa58-4de6-984d-f44332af5204",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"artifact_type": "semantic_blocks",
|
||||
"object_name": "artifacts/7cbdfe3c/semantic_blocks.json",
|
||||
"content_type": "application/json",
|
||||
"byte_size": 0,
|
||||
"checksum": "",
|
||||
"metadata": {},
|
||||
"created_at": "2026-05-26T12:18:43.188511+00:00"
|
||||
},
|
||||
"9b0d8bda-e69e-4a4e-ae06-a308afe43109": {
|
||||
"artifact_id": "9b0d8bda-e69e-4a4e-ae06-a308afe43109",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"artifact_type": "vector_chunks",
|
||||
"object_name": "artifacts/7cbdfe3c/vector_chunks.json",
|
||||
"content_type": "application/json",
|
||||
"byte_size": 0,
|
||||
"checksum": "",
|
||||
"metadata": {},
|
||||
"created_at": "2026-05-26T12:18:43.188526+00:00"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,392 +1,9 @@
|
||||
{
|
||||
"69280841": {
|
||||
"doc_id": "69280841",
|
||||
"doc_name": "TCT算法接口.pdf",
|
||||
"file_name": "TCT算法接口.pdf",
|
||||
"object_name": "69280841/TCT算法接口.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 165557,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "local_markdown_parser",
|
||||
"index_name": "",
|
||||
"error_message": "embedding 维度不匹配,期望 1536",
|
||||
"created_at": "2026-05-18T07:12:16.668306+00:00",
|
||||
"updated_at": "2026-05-18T07:12:19.417142+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"structure_nodes": 0
|
||||
}
|
||||
},
|
||||
"44121fbb": {
|
||||
"doc_id": "44121fbb",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "44121fbb/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5cb9d0>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T09:53:47.996183+00:00",
|
||||
"updated_at": "2026-05-18T09:53:50.825868+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5cb9d0>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"77debb4a": {
|
||||
"doc_id": "77debb4a",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "77debb4a/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a6dd480>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:05:46.104259+00:00",
|
||||
"updated_at": "2026-05-18T10:05:48.704061+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a6dd480>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"d12bdcc8": {
|
||||
"doc_id": "d12bdcc8",
|
||||
"doc_name": "TCT算法接口.pdf",
|
||||
"file_name": "TCT算法接口.pdf",
|
||||
"object_name": "d12bdcc8/TCT算法接口.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 165557,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5bf570>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:07:22.199824+00:00",
|
||||
"updated_at": "2026-05-18T10:07:24.653751+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5bf570>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"3c2e8c9c": {
|
||||
"doc_id": "3c2e8c9c",
|
||||
"doc_name": "20260415_Continental tire mobile app solution.pdf",
|
||||
"file_name": "20260415_Continental tire mobile app solution.pdf",
|
||||
"object_name": "3c2e8c9c/20260415_Continental tire mobile app solution.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 2178074,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5bc8d0>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:09:58.338274+00:00",
|
||||
"updated_at": "2026-05-18T10:10:01.295502+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614a5bc8d0>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"d22d21a0": {
|
||||
"doc_id": "d22d21a0",
|
||||
"doc_name": "20260415_Continental tire mobile app solution.pdf",
|
||||
"file_name": "20260415_Continental tire mobile app solution.pdf",
|
||||
"object_name": "d22d21a0/20260415_Continental tire mobile app solution.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 2178074,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614b994160>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:12:20.078027+00:00",
|
||||
"updated_at": "2026-05-18T10:12:22.999843+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614b994160>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"35f129d3": {
|
||||
"doc_id": "35f129d3",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "35f129d3/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "",
|
||||
"index_name": "",
|
||||
"error_message": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614b995370>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"created_at": "2026-05-18T10:13:24.706512+00:00",
|
||||
"updated_at": "2026-05-18T10:13:27.180509+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"failure_reason": "unable to load credentials from any of the providers in the chain: ['EnvironmentVariableCredentialsProvider: Environment variable accessKeyId cannot be empty', 'CLIProfileCredentialsProvider: unable to open credentials file: C:\\\\Users\\\\A200477427\\\\.aliyun/config.json', 'ProfileCredentialsProvider: failed to get credential from credentials file: $C:\\\\Users\\\\A200477427\\\\.alibabacloud/credentials.ini', \"EcsRamRoleCredentialsProvider: HTTPConnectionPool(host='100.100.100.200', port=80): Max retries exceeded with url: /latest/meta-data/ram/security-credentials/ (Caused by ConnectTimeoutError(<HTTPConnection(host='100.100.100.200', port=80) at 0x2614b995370>, 'Connection to 100.100.100.200 timed out. (connect timeout=1.0)'))\"]",
|
||||
"processing_stage": "failed"
|
||||
}
|
||||
},
|
||||
"efc21515": {
|
||||
"doc_id": "efc21515",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "efc21515/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "Client error '400 Bad Request' for url 'http://6.86.80.4:30080/v1/embeddings'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
|
||||
"created_at": "2026-05-18T13:47:32.076786+00:00",
|
||||
"updated_at": "2026-05-18T13:47:57.998073+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-a6e84447457f43cb85f95225cfc6495b",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/efc21515/layouts.json",
|
||||
"structure_nodes": "artifacts/efc21515/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/efc21515/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/efc21515/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "Client error '400 Bad Request' for url 'http://6.86.80.4:30080/v1/embeddings'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400"
|
||||
}
|
||||
},
|
||||
"0d4b08bc": {
|
||||
"doc_id": "0d4b08bc",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "0d4b08bc/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "Client error '404 Not Found' for url 'http://6.86.80.4:30080/v1/embeddings'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404",
|
||||
"created_at": "2026-05-18T14:03:15.134344+00:00",
|
||||
"updated_at": "2026-05-18T14:03:34.843448+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-78353d85daa24147b68d8fb71895179f",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/0d4b08bc/layouts.json",
|
||||
"structure_nodes": "artifacts/0d4b08bc/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/0d4b08bc/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/0d4b08bc/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "Client error '404 Not Found' for url 'http://6.86.80.4:30080/v1/embeddings'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404"
|
||||
}
|
||||
},
|
||||
"4302f314": {
|
||||
"doc_id": "4302f314",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "4302f314/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "embedding 维度不匹配,期望 1536",
|
||||
"created_at": "2026-05-18T14:11:29.943973+00:00",
|
||||
"updated_at": "2026-05-18T14:11:48.554500+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-23935ee455ac4b26ac4201ac4781ee52",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/4302f314/layouts.json",
|
||||
"structure_nodes": "artifacts/4302f314/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/4302f314/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/4302f314/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "embedding 维度不匹配,期望 1536"
|
||||
}
|
||||
},
|
||||
"765ed1ee": {
|
||||
"doc_id": "765ed1ee",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "765ed1ee/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "<MilvusException: (code=1100, message=the dim (1024) of field data(embedding) is not equal to schema dim (1536): invalid parameter[expected=1536][actual=1024])>",
|
||||
"created_at": "2026-05-18T14:18:28.875138+00:00",
|
||||
"updated_at": "2026-05-18T14:18:57.389110+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-f116856bc29245baa2531b245078a701",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/765ed1ee/layouts.json",
|
||||
"structure_nodes": "artifacts/765ed1ee/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/765ed1ee/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/765ed1ee/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "<MilvusException: (code=1100, message=the dim (1024) of field data(embedding) is not equal to schema dim (1536): invalid parameter[expected=1536][actual=1024])>"
|
||||
}
|
||||
},
|
||||
"05cabe09": {
|
||||
"doc_id": "05cabe09",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "05cabe09/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "failed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 0,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "",
|
||||
"error_message": "embedding 维度不匹配,期望 1536",
|
||||
"created_at": "2026-05-18T14:24:32.156500+00:00",
|
||||
"updated_at": "2026-05-18T14:24:50.114138+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-897d858983df48e28e9819e563d46208",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/05cabe09/layouts.json",
|
||||
"structure_nodes": "artifacts/05cabe09/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/05cabe09/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/05cabe09/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "failed",
|
||||
"failure_reason": "embedding 维度不匹配,期望 1536"
|
||||
}
|
||||
},
|
||||
"9acb2ba0": {
|
||||
"doc_id": "9acb2ba0",
|
||||
"doc_name": "大众汽车手册.pdf",
|
||||
"file_name": "大众汽车手册.pdf",
|
||||
"object_name": "9acb2ba0/大众汽车手册.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size_bytes": 766565,
|
||||
"status": "indexed",
|
||||
"regulation_type": "",
|
||||
"version": "",
|
||||
"summary": "",
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 27,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "regulations_dense_1024_v1",
|
||||
"error_message": "",
|
||||
"created_at": "2026-05-18T14:29:01.368719+00:00",
|
||||
"updated_at": "2026-05-18T14:29:23.699068+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260518-e5fd4a5419e74d569c562e389e6ae72c",
|
||||
"layout_count": 87,
|
||||
"structure_node_count": 20,
|
||||
"semantic_block_count": 27,
|
||||
"vector_chunk_count": 27,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/9acb2ba0/layouts.json",
|
||||
"structure_nodes": "artifacts/9acb2ba0/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/9acb2ba0/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/9acb2ba0/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "indexed",
|
||||
"index_collection": "regulations_dense_1024_v1"
|
||||
}
|
||||
},
|
||||
"52bd970f": {
|
||||
"doc_id": "52bd970f",
|
||||
"7cbdfe3c": {
|
||||
"doc_id": "7cbdfe3c",
|
||||
"doc_name": "使用RSA Token连接CheckPoint VPN及PIN码设置_220.181.114.93 or 10.25.134.3.docx",
|
||||
"file_name": "使用RSA Token连接CheckPoint VPN及PIN码设置_220.181.114.93 or 10.25.134.3.docx",
|
||||
"object_name": "52bd970f/使用RSA Token连接CheckPoint VPN及PIN码设置_220.181.114.93 or 10.25.134.3.docx",
|
||||
"object_name": "7cbdfe3c/使用RSA Token连接CheckPoint VPN及PIN码设置_220.181.114.93 or 10.25.134.3.docx",
|
||||
"content_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"size_bytes": 1199920,
|
||||
"status": "indexed",
|
||||
@@ -396,26 +13,26 @@
|
||||
"summary_latency_ms": 0,
|
||||
"chunk_count": 34,
|
||||
"parser_name": "aliyun_docmind",
|
||||
"index_name": "regulations_dense_1024_v1",
|
||||
"index_name": "regulations_dense_1024_v2",
|
||||
"error_message": "",
|
||||
"created_at": "2026-05-25T07:45:12.777459+00:00",
|
||||
"updated_at": "2026-05-25T07:45:37.314290+00:00",
|
||||
"created_at": "2026-05-26T12:18:27.206125+00:00",
|
||||
"updated_at": "2026-05-26T12:18:51.171308+00:00",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"parse_task_id": "docmind-20260525-6d782dc33f2748a4a1020df765b8182d",
|
||||
"parse_task_id": "docmind-20260526-10b94713ccb348498b12180a5dcf32ff",
|
||||
"layout_count": 48,
|
||||
"structure_node_count": 6,
|
||||
"semantic_block_count": 33,
|
||||
"vector_chunk_count": 34,
|
||||
"artifact_keys": {
|
||||
"layouts": "artifacts/52bd970f/layouts.json",
|
||||
"structure_nodes": "artifacts/52bd970f/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/52bd970f/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/52bd970f/vector_chunks.json"
|
||||
"layouts": "artifacts/7cbdfe3c/layouts.json",
|
||||
"structure_nodes": "artifacts/7cbdfe3c/structure_nodes.json",
|
||||
"semantic_blocks": "artifacts/7cbdfe3c/semantic_blocks.json",
|
||||
"vector_chunks": "artifacts/7cbdfe3c/vector_chunks.json"
|
||||
},
|
||||
"processing_stage": "indexed",
|
||||
"index_collection": "regulations_dense_1024_v1"
|
||||
"index_collection": "regulations_dense_1024_v2"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user