Refactor document handling and update Milvus collection settings

- Removed multiple failed document entries from `documents.json`.
- Added a new document entry with updated metadata and changed the index name to `regulations_dense_1024_v2`.
- Updated architecture documentation to reflect changes in the Milvus collection name.
- Adjusted requirements by removing the sqlalchemy dependency.
- Modified test cases to align with new document structure and naming conventions.
- Introduced a new test file for Milvus vector index runtime recovery and error handling.
- Updated assertions in various test files to ensure compatibility with the new schema.
This commit is contained in:
ash66
2026-05-26 20:21:31 +08:00
parent fec22a3a2c
commit 30c7bda389
42 changed files with 7482 additions and 569 deletions

View File

@@ -3,6 +3,7 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.encoders import jsonable_encoder
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from loguru import logger
@@ -12,6 +13,7 @@ from app.api.routes import api_router
from app.config.logging import setup_logging
from app.config.settings import settings
from app.shared.bootstrap import cleanup_runtime_dependencies, preload_runtime_dependencies
from app.shared.errors import VectorStoreSchemaError
# Keep module behavior explicit so the backend flow stays easy to audit.
@@ -55,16 +57,33 @@ app.add_middleware(
app.include_router(api_router, prefix="/api/v1")
@app.exception_handler(VectorStoreSchemaError)
async def vector_store_schema_exception_handler(request: Request, exc: VectorStoreSchemaError):
"""Return a stable JSON response for vector store schema/runtime errors."""
logger.error(f"向量库 schema 异常: {exc}")
return JSONResponse(
status_code=500,
content=jsonable_encoder(
ErrorResponse(
error="VectorStoreSchemaError",
message=str(exc),
)
),
)
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
"""Global exception handler."""
logger.error(f"未处理的异常: {exc}")
return JSONResponse(
status_code=500,
content=ErrorResponse(
error="InternalServerError",
message=str(exc),
).model_dump(),
content=jsonable_encoder(
ErrorResponse(
error="InternalServerError",
message=str(exc),
)
),
)

View File

@@ -7,6 +7,7 @@ from .knowledge import router as knowledge_router
from .agent import router as agent_router
from .status import router as status_router
from .perception import router as perception_router
from .rag import router as rag_router
# Keep package boundaries explicit so backend imports stay predictable.
@@ -20,6 +21,7 @@ api_router.include_router(agent_router)
api_router.include_router(compliance_router)
api_router.include_router(status_router)
api_router.include_router(perception_router)
api_router.include_router(rag_router)
__all__ = [
"api_router",
@@ -29,4 +31,5 @@ __all__ = [
"compliance_router",
"status_router",
"perception_router",
"rag_router",
]

View File

@@ -29,14 +29,19 @@ async def search_knowledge(request: SearchRequest):
results=[
SearchResultItem(
id=index + 1,
content=item.content,
content=item.text,
score=item.score,
metadata={
"doc_id": item.doc_id,
"doc_name": item.doc_name,
"doc_title": item.doc_title,
"chunk_id": item.chunk_id,
"chunk_type": item.chunk_type,
"section_title": item.section_title,
"page_number": item.page_number,
"page_start": item.page_start,
"page_end": item.page_end,
"section_level": item.section_level,
"chunk_index": item.chunk_index,
"piece_index": item.piece_index,
**item.metadata,
},
)

View File

@@ -50,8 +50,8 @@ async def rag_chat(request: RagChatRequest):
{
"id": str(s.get("chunk_id") or s.get("doc_id") or idx + 1),
"score": s.get("score", 0),
"preview": s.get("content", "")[:200],
"doc_name": s.get("doc_name", ""),
"preview": s.get("text", s.get("content", ""))[:200],
"doc_name": s.get("doc_title", s.get("doc_name", "")),
"clause": s.get("section_title", "法规片段"),
"doc_id": s.get("doc_id"),
"download_url": (