Refactor document handling and update Milvus collection settings
- Removed multiple failed document entries from `documents.json`. - Added a new document entry with updated metadata and changed the index name to `regulations_dense_1024_v2`. - Updated architecture documentation to reflect changes in the Milvus collection name. - Adjusted requirements by removing the sqlalchemy dependency. - Modified test cases to align with new document structure and naming conventions. - Introduced a new test file for Milvus vector index runtime recovery and error handling. - Updated assertions in various test files to ensure compatibility with the new schema.
This commit is contained in:
@@ -45,10 +45,10 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
|
||||
context_tokens = 0
|
||||
for idx, chunk in enumerate(retrieved_chunks, start=1):
|
||||
block = (
|
||||
f"[{idx}] 文档: {chunk.doc_name}\n"
|
||||
f"[{idx}] 文档: {chunk.doc_title}\n"
|
||||
f"章节: {chunk.section_title or '未标注'}\n"
|
||||
f"页码: {chunk.page_number}\n"
|
||||
f"内容: {chunk.content}"
|
||||
f"页码: {chunk.page_start}" + (f"-{chunk.page_end}" if chunk.page_end and chunk.page_end != chunk.page_start else "") + "\n"
|
||||
f"内容: {chunk.text}"
|
||||
)
|
||||
block_tokens = self._estimate_tokens(block)
|
||||
if context_tokens + block_tokens > settings.rag_max_context_tokens:
|
||||
@@ -73,10 +73,10 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
|
||||
return False
|
||||
estimated_total_tokens = sum(
|
||||
self._estimate_tokens(
|
||||
f"[{idx}] 文档: {chunk.doc_name}\n"
|
||||
f"[{idx}] 文档: {chunk.doc_title}\n"
|
||||
f"章节: {chunk.section_title or '未标注'}\n"
|
||||
f"页码: {chunk.page_number}\n"
|
||||
f"内容: {chunk.content}"
|
||||
f"页码: {chunk.page_start}" + (f"-{chunk.page_end}" if chunk.page_end and chunk.page_end != chunk.page_start else "") + "\n"
|
||||
f"内容: {chunk.text}"
|
||||
)
|
||||
for idx, chunk in enumerate(retrieved_chunks, start=1)
|
||||
)
|
||||
@@ -87,12 +87,17 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
|
||||
return [
|
||||
AnswerSource(
|
||||
doc_id=chunk.doc_id,
|
||||
doc_name=chunk.doc_name,
|
||||
doc_title=chunk.doc_title,
|
||||
chunk_id=chunk.chunk_id,
|
||||
chunk_type=chunk.chunk_type,
|
||||
section_title=chunk.section_title,
|
||||
page_number=chunk.page_number,
|
||||
page_start=chunk.page_start,
|
||||
page_end=chunk.page_end,
|
||||
section_level=chunk.section_level,
|
||||
chunk_index=chunk.chunk_index,
|
||||
piece_index=chunk.piece_index,
|
||||
score=chunk.score,
|
||||
content=chunk.content,
|
||||
text=chunk.text,
|
||||
metadata=chunk.metadata,
|
||||
)
|
||||
for chunk in chunks
|
||||
|
||||
Reference in New Issue
Block a user