Refactor document handling and update Milvus collection settings

- Removed multiple failed document entries from `documents.json`.
- Added a new document entry with updated metadata and changed the index name to `regulations_dense_1024_v2`.
- Updated architecture documentation to reflect changes in the Milvus collection name.
- Adjusted requirements by removing the sqlalchemy dependency.
- Modified test cases to align with new document structure and naming conventions.
- Introduced a new test file for Milvus vector index runtime recovery and error handling.
- Updated assertions in various test files to ensure compatibility with the new schema.
This commit is contained in:
ash66
2026-05-26 20:21:31 +08:00
parent fec22a3a2c
commit 30c7bda389
42 changed files with 7482 additions and 569 deletions

View File

@@ -29,11 +29,16 @@ def _reciprocal_rank_fusion(
RetrievedChunk(
chunk_id=chunk_map[ck].chunk_id,
doc_id=chunk_map[ck].doc_id,
doc_name=chunk_map[ck].doc_name,
content=chunk_map[ck].content,
doc_title=chunk_map[ck].doc_title,
text=chunk_map[ck].text,
score=scores[ck],
chunk_type=chunk_map[ck].chunk_type,
section_title=chunk_map[ck].section_title,
page_number=chunk_map[ck].page_number,
page_start=chunk_map[ck].page_start,
page_end=chunk_map[ck].page_end,
section_level=chunk_map[ck].section_level,
chunk_index=chunk_map[ck].chunk_index,
piece_index=chunk_map[ck].piece_index,
metadata=chunk_map[ck].metadata,
)
for ck in sorted_keys