Refactor document handling and update Milvus collection settings

- Removed multiple failed document entries from `documents.json`.
- Added a new document entry with updated metadata and changed the index name to `regulations_dense_1024_v2`.
- Updated architecture documentation to reflect changes in the Milvus collection name.
- Adjusted requirements by removing the sqlalchemy dependency.
- Modified test cases to align with new document structure and naming conventions.
- Introduced a new test file for Milvus vector index runtime recovery and error handling.
- Updated assertions in various test files to ensure compatibility with the new schema.
This commit is contained in:
ash66
2026-05-26 20:21:31 +08:00
parent fec22a3a2c
commit 30c7bda389
42 changed files with 7482 additions and 569 deletions

View File

@@ -71,9 +71,9 @@ class PerceptionService:
affected_docs.append(
{
"doc_id": chunk.doc_id,
"doc_name": chunk.doc_name,
"doc_title": chunk.doc_title,
"score": round(float(chunk.score), 4),
"snippet": (chunk.content or "")[:180],
"snippet": (chunk.text or "")[:180],
"clause": getattr(chunk, "section_title", "") or "",
}
)
@@ -84,7 +84,7 @@ class PerceptionService:
# --- 2. Build context from retrieved chunks ---
context_parts = [
f"[文档{i}: {c.doc_name}]\n{(c.content or '')[:400]}"
f"[文档{i}: {c.doc_title}]\n{(c.text or '')[:400]}"
for i, c in enumerate(chunks[:5], 1)
]
context = "\n\n".join(context_parts) if context_parts else "(知识库中暂无相关文档)"