Refactor document handling and update Milvus collection settings

- Removed multiple failed document entries from `documents.json`.
- Added a new document entry with updated metadata and changed the index name to `regulations_dense_1024_v2`.
- Updated architecture documentation to reflect changes in the Milvus collection name.
- Adjusted requirements by removing the sqlalchemy dependency.
- Modified test cases to align with new document structure and naming conventions.
- Introduced a new test file for Milvus vector index runtime recovery and error handling.
- Updated assertions in various test files to ensure compatibility with the new schema.
This commit is contained in:
ash66
2026-05-26 20:21:31 +08:00
parent fec22a3a2c
commit 30c7bda389
42 changed files with 7482 additions and 569 deletions

View File

@@ -122,16 +122,17 @@ class FakeChunkBuilder:
Chunk(
chunk_id=f"{parsed_document.doc_id}-chunk-1",
doc_id=parsed_document.doc_id,
doc_name=parsed_document.doc_name,
content="法规正文",
doc_title=parsed_document.doc_name,
text="法规正文",
embedding_text="标准:测试\n章节:第一章\n\n法规正文",
section_title="第一章",
section_path=["第一章"],
page_number=1,
page_start=1,
page_end=1,
chunk_type="section_text",
regulation_type=regulation_type,
version=version,
semantic_id="semantic-1",
block_type="section_text",
metadata={"source": "aliyun_vector_chunk"},
)
]