Refactor document handling and update Milvus collection settings
- Removed multiple failed document entries from `documents.json`. - Added a new document entry with updated metadata and changed the index name to `regulations_dense_1024_v2`. - Updated architecture documentation to reflect changes in the Milvus collection name. - Adjusted requirements by removing the sqlalchemy dependency. - Modified test cases to align with new document structure and naming conventions. - Introduced a new test file for Milvus vector index runtime recovery and error handling. - Updated assertions in various test files to ensure compatibility with the new schema.
This commit is contained in:
131
backend/data/document_processing.json
Normal file
131
backend/data/document_processing.json
Normal file
@@ -0,0 +1,131 @@
|
||||
{
|
||||
"runs": {
|
||||
"8e722053-5009-40fe-a483-535b40ebbb16": {
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"trigger_type": "upload",
|
||||
"run_status": "succeeded",
|
||||
"parser_backend": "aliyun_docmind",
|
||||
"chunk_backend": "aliyun",
|
||||
"embedding_model": "text-embedding-v3",
|
||||
"index_name": "regulations_dense_1024_v2",
|
||||
"started_at": "2026-05-26T12:18:27.208692+00:00",
|
||||
"stored_at": "2026-05-26T12:18:27.712855+00:00",
|
||||
"parsed_at": "2026-05-26T12:18:42.989238+00:00",
|
||||
"indexed_at": "2026-05-26T12:18:51.172418+00:00",
|
||||
"finished_at": "2026-05-26T12:18:51.172418+00:00",
|
||||
"layout_count": 48,
|
||||
"structure_node_count": 6,
|
||||
"semantic_block_count": 33,
|
||||
"vector_chunk_count": 34,
|
||||
"chunk_count": 34,
|
||||
"failure_stage": "",
|
||||
"error_message": "",
|
||||
"metadata": {
|
||||
"generate_summary": true,
|
||||
"parse_task_id": "docmind-20260526-10b94713ccb348498b12180a5dcf32ff"
|
||||
}
|
||||
}
|
||||
},
|
||||
"status_events": {
|
||||
"d0532baf-0d65-4130-b282-ec51f04132fd": {
|
||||
"event_id": "d0532baf-0d65-4130-b282-ec51f04132fd",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"from_status": "",
|
||||
"to_status": "pending",
|
||||
"stage": "document_created",
|
||||
"message": "Document record created",
|
||||
"metadata": {},
|
||||
"occurred_at": "2026-05-26T12:18:27.235921+00:00"
|
||||
},
|
||||
"a5e32db5-25c3-4c73-a987-7311f0e72a31": {
|
||||
"event_id": "a5e32db5-25c3-4c73-a987-7311f0e72a31",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"from_status": "pending",
|
||||
"to_status": "stored",
|
||||
"stage": "store",
|
||||
"message": "Source file stored",
|
||||
"metadata": {},
|
||||
"occurred_at": "2026-05-26T12:18:27.741462+00:00"
|
||||
},
|
||||
"18e04ce7-9d7a-4008-8600-e2590100bd85": {
|
||||
"event_id": "18e04ce7-9d7a-4008-8600-e2590100bd85",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"from_status": "stored",
|
||||
"to_status": "parsed",
|
||||
"stage": "parse",
|
||||
"message": "Document parsed",
|
||||
"metadata": {
|
||||
"artifact_count": 4
|
||||
},
|
||||
"occurred_at": "2026-05-26T12:18:43.218026+00:00"
|
||||
},
|
||||
"d3b06025-5c91-4a42-9e5f-dce1c5312b96": {
|
||||
"event_id": "d3b06025-5c91-4a42-9e5f-dce1c5312b96",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"from_status": "parsed",
|
||||
"to_status": "indexed",
|
||||
"stage": "index",
|
||||
"message": "Document indexed",
|
||||
"metadata": {
|
||||
"chunk_count": 34,
|
||||
"index_name": "regulations_dense_1024_v2"
|
||||
},
|
||||
"occurred_at": "2026-05-26T12:18:51.195442+00:00"
|
||||
}
|
||||
},
|
||||
"artifacts": {
|
||||
"47fe2877-a8f5-4e1d-901b-80cd0194ba96": {
|
||||
"artifact_id": "47fe2877-a8f5-4e1d-901b-80cd0194ba96",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"artifact_type": "layouts",
|
||||
"object_name": "artifacts/7cbdfe3c/layouts.json",
|
||||
"content_type": "application/json",
|
||||
"byte_size": 0,
|
||||
"checksum": "",
|
||||
"metadata": {},
|
||||
"created_at": "2026-05-26T12:18:43.188467+00:00"
|
||||
},
|
||||
"44aa075b-86b2-48a7-9d14-a2453bd53863": {
|
||||
"artifact_id": "44aa075b-86b2-48a7-9d14-a2453bd53863",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"artifact_type": "structure_nodes",
|
||||
"object_name": "artifacts/7cbdfe3c/structure_nodes.json",
|
||||
"content_type": "application/json",
|
||||
"byte_size": 0,
|
||||
"checksum": "",
|
||||
"metadata": {},
|
||||
"created_at": "2026-05-26T12:18:43.188494+00:00"
|
||||
},
|
||||
"dedcc8fe-fa58-4de6-984d-f44332af5204": {
|
||||
"artifact_id": "dedcc8fe-fa58-4de6-984d-f44332af5204",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"artifact_type": "semantic_blocks",
|
||||
"object_name": "artifacts/7cbdfe3c/semantic_blocks.json",
|
||||
"content_type": "application/json",
|
||||
"byte_size": 0,
|
||||
"checksum": "",
|
||||
"metadata": {},
|
||||
"created_at": "2026-05-26T12:18:43.188511+00:00"
|
||||
},
|
||||
"9b0d8bda-e69e-4a4e-ae06-a308afe43109": {
|
||||
"artifact_id": "9b0d8bda-e69e-4a4e-ae06-a308afe43109",
|
||||
"doc_id": "7cbdfe3c",
|
||||
"run_id": "8e722053-5009-40fe-a483-535b40ebbb16",
|
||||
"artifact_type": "vector_chunks",
|
||||
"object_name": "artifacts/7cbdfe3c/vector_chunks.json",
|
||||
"content_type": "application/json",
|
||||
"byte_size": 0,
|
||||
"checksum": "",
|
||||
"metadata": {},
|
||||
"created_at": "2026-05-26T12:18:43.188526+00:00"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user