Refactor document handling and update Milvus collection settings

- Removed multiple failed document entries from `documents.json`.
- Added a new document entry with updated metadata and changed the index name to `regulations_dense_1024_v2`.
- Updated architecture documentation to reflect changes in the Milvus collection name.
- Adjusted requirements by removing the sqlalchemy dependency.
- Modified test cases to align with new document structure and naming conventions.
- Introduced a new test file for Milvus vector index runtime recovery and error handling.
- Updated assertions in various test files to ensure compatibility with the new schema.
This commit is contained in:
ash66
2026-05-26 20:21:31 +08:00
parent fec22a3a2c
commit 30c7bda389
42 changed files with 7482 additions and 569 deletions

View File

@@ -18,11 +18,11 @@ class FakeRetriever:
RetrievedChunk(
chunk_id="chunk-1",
doc_id="doc-1",
doc_name="测试法规",
content="法规正文",
doc_title="测试法规",
text="法规正文",
score=0.91,
section_title="第一章",
page_number=1,
page_start=1,
metadata={"section_title": "第一章"},
)
]
@@ -47,12 +47,12 @@ class FakeAnswerGenerator:
sources=[
AnswerSource(
doc_id=item.doc_id,
doc_name=item.doc_name,
doc_title=item.doc_title,
chunk_id=item.chunk_id,
section_title=item.section_title,
page_number=item.page_number,
page_start=item.page_start,
score=item.score,
content=item.content,
text=item.text,
metadata=item.metadata,
)
for item in retrieved_chunks