feat(bootstrap): refactor runtime dependency management and add lazy loading for binary store and vector index

feat(agent): update import for agent session service feat(openai): add context truncation check in OpenAI answer generator docs(README): update frontend environment file conventions fix(vite): default local frontend development to local backend
2026-05-25 13:58:48 +08:00
parent 091a02c522
commit 10a034e294
11 changed files with 162 additions and 15 deletions
--- a/backend/app/api/main.py
+++ b/backend/app/api/main.py
@@ -11,7 +11,7 @@ from app.api.models import ErrorResponse
 from app.api.routes import api_router
 from app.config.logging import setup_logging
 from app.config.settings import settings
-from app.services.llm.llm_factory import LLMFactory
+from app.shared.bootstrap import cleanup_runtime_dependencies, preload_runtime_dependencies
 # Keep module behavior explicit so the backend flow stays easy to audit.
@@ -24,12 +24,12 @@ async def lifespan(app: FastAPI):
    logger.info(f"启动 {settings.app_name} v{settings.app_version}")
    logger.info(f"调试模式: {settings.debug}")
    logger.info("预加载LLM客户端...")
-    LLMFactory.preload_clients(["qwen", "deepseek"])
+    preload_runtime_dependencies()
    yield
    logger.info("应用关闭，执行清理...")
-    LLMFactory.cleanup()
+    cleanup_runtime_dependencies()
 app = FastAPI(
--- a/backend/app/api/routes/agent.py
+++ b/backend/app/api/routes/agent.py
@@ -20,7 +20,7 @@ from app.api.models import (
 )
 from app.config.settings import settings
 from app.shared.async_utils import iter_in_thread
-from app.shared.bootstrap import get_agent_conversation_service, get_conversation_store
+from app.shared.bootstrap import get_agent_conversation_service, get_agent_session_service
 # Keep route handlers close to their transport-layer wiring for easier auditing.
--- a/backend/app/infrastructure/llm/openai_compatible_answer_generator.py
+++ b/backend/app/infrastructure/llm/openai_compatible_answer_generator.py
@@ -67,6 +67,21 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
        )
        return messages, context_tokens
    def _is_context_truncated(self, *, retrieved_chunks: list[RetrievedChunk], context_tokens: int) -> bool:
        """Return whether the prompt context had to omit retrieved chunks to fit the token budget."""
        if not retrieved_chunks:
            return False
        estimated_total_tokens = sum(
            self._estimate_tokens(
                f"[{idx}] 文档: {chunk.doc_name}\n"
                f"章节: {chunk.section_title or '未标注'}\n"
                f"页码: {chunk.page_number}\n"
                f"内容: {chunk.content}"
            )
            for idx, chunk in enumerate(retrieved_chunks, start=1)
        )
        return estimated_total_tokens > context_tokens
    def _sources(self, chunks: list[RetrievedChunk]) -> list[AnswerSource]:
        """Handle sources for this module for the Open A I Compatible Answer Generator instance."""
        return [
@@ -111,7 +126,10 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
            latency_ms=latency_ms,
            retrieved_count=len(retrieved_chunks),
            context_tokens=context_tokens,
-            truncated=len(retrieved_chunks) > len(messages),
+            truncated=self._is_context_truncated(
                retrieved_chunks=retrieved_chunks,
                context_tokens=context_tokens,
            ),
            error=response.error,
        )
--- a/backend/app/shared/bootstrap.py
+++ b/backend/app/shared/bootstrap.py
@@ -3,31 +3,134 @@
 from __future__ import annotations
 from functools import lru_cache
 from typing import Callable
 from app.application.agent import AgentConversationService, AgentSessionService
 from app.application.documents import DocumentCommandService, DocumentQueryService
 from app.application.knowledge import KnowledgeRetrievalService
 from app.application.perception.services import PerceptionService
 from app.config.settings import settings
 from app.domain.documents import DocumentBinaryStore
 from app.domain.retrieval import VectorIndex
 from app.infrastructure.embedding.openai_compatible_embedding_provider import OpenAICompatibleEmbeddingProvider
 from app.infrastructure.llm.openai_compatible_answer_generator import OpenAICompatibleAnswerGenerator
 from app.infrastructure.parser.aliyun_document_parser import AliyunDocumentParser
 from app.infrastructure.parser.local_chunk_builder import LocalRegulationChunkBuilder
 from app.infrastructure.parser.local_document_parser import LocalDocumentParser
 from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder
 from app.infrastructure.perception.mock_event_store import MockEventStore
 from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore
 from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
 from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore
 from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository
 from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore
 from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever
 from app.infrastructure.vectorstore.cross_encoder_reranker import OpenAICompatibleReranker
 from app.infrastructure.vectorstore.dense_retriever import DenseRetriever
 from app.infrastructure.vectorstore.milvus_vector_index import MilvusVectorIndex
-from app.infrastructure.vectorstore.cross_encoder_reranker import OpenAICompatibleReranker
+from app.services.llm.llm_factory import LLMFactory
 from app.infrastructure.perception.mock_event_store import MockEventStore
 from app.application.perception.services import PerceptionService
 # Keep shared wiring centralized so dependency construction remains consistent.
 class LazyBinaryStore(DocumentBinaryStore):
    """Delay MinIO connection work until binary storage is actually needed."""
    def __init__(self, factory: Callable[[], DocumentBinaryStore]) -> None:
        """Initialize the lazy binary store wrapper."""
        self._factory = factory
        self._store: DocumentBinaryStore | None = None
    def _get_store(self) -> DocumentBinaryStore:
        """Create the underlying store on first use and reuse it afterwards."""
        if self._store is None:
            self._store = self._factory()
        return self._store
    @property
    def client(self):
        """Expose the underlying client for compatibility with health endpoints."""
        return self._get_store().client
    def save(
        self,
        *,
        object_name: str,
        data: bytes,
        content_type: str,
        metadata: dict[str, str] | None = None,
    ) -> None:
        """Save data through the underlying binary store implementation."""
        self._get_store().save(
            object_name=object_name,
            data=data,
            content_type=content_type,
            metadata=metadata,
        )
    def read(self, object_name: str) -> bytes:
        """Read data through the underlying binary store implementation."""
        return self._get_store().read(object_name)
    def delete(self, object_name: str) -> None:
        """Delete data through the underlying binary store implementation."""
        self._get_store().delete(object_name)
 class LazyVectorIndex(VectorIndex):
    """Delay Milvus connection work until vector operations are actually needed."""
    def __init__(self, factory: Callable[[], VectorIndex]) -> None:
        """Initialize the lazy vector index wrapper."""
        self._factory = factory
        self._index: VectorIndex | None = None
    def _get_index(self) -> VectorIndex:
        """Create the underlying index on first use and reuse it afterwards."""
        if self._index is None:
            self._index = self._factory()
        return self._index
    @property
    def collection(self):
        """Expose the underlying Milvus collection for compatibility adapters."""
        return self._get_index().collection
    def upsert(self, chunks, vectors) -> int:
        """Insert or update vectors through the underlying vector index implementation."""
        return self._get_index().upsert(chunks, vectors)
    def delete_by_document(self, doc_id: str) -> int:
        """Delete vectors through the underlying vector index implementation."""
        return self._get_index().delete_by_document(doc_id)
    def search(self, query_vector: list[float], top_k: int, filters: str | None = None):
        """Search vectors through the underlying vector index implementation."""
        return self._get_index().search(query_vector, top_k, filters)
    def count_by_document(self) -> dict[str, int]:
        """Count document vectors through the underlying vector index implementation."""
        return self._get_index().count_by_document()
    def list_document_metadata(self) -> list[dict]:
        """List document metadata through the underlying vector index implementation."""
        return self._get_index().list_document_metadata()
    def health(self) -> dict:
        """Return vector index health through the underlying vector index implementation."""
        return self._get_index().health()
@lru_cache
 def _build_binary_store() -> MinioDocumentBinaryStore:
    """Return the concrete binary store implementation."""
    return MinioDocumentBinaryStore()
@lru_cache
 def _build_vector_index() -> MilvusVectorIndex:
    """Return the concrete vector index implementation."""
    return MilvusVectorIndex()
@lru_cache
 def get_document_repository():
@@ -46,9 +149,9 @@ def get_parse_artifact_store():
@lru_cache
-def get_binary_store() -> MinioDocumentBinaryStore:
+def get_binary_store() -> DocumentBinaryStore:
    """Return binary store."""
-    return MinioDocumentBinaryStore()
+    return LazyBinaryStore(_build_binary_store)
@lru_cache
@@ -77,9 +180,9 @@ def get_embedding_provider() -> OpenAICompatibleEmbeddingProvider:
@lru_cache
-def get_vector_index() -> MilvusVectorIndex:
+def get_vector_index() -> VectorIndex:
    """Return vector index."""
-    return MilvusVectorIndex()
+    return LazyVectorIndex(_build_vector_index)
@lru_cache
@@ -162,6 +265,19 @@ def get_perception_service() -> PerceptionService:
        event_store=MockEventStore(),
        retrieval_service=get_retrieval_service(),
    )
@lru_cache
 def get_agent_session_service() -> AgentSessionService:
    """Return agent session service."""
    return AgentSessionService(conversation_store=get_conversation_store())
 def preload_runtime_dependencies() -> None:
    """Warm dependencies that are safe and useful to preload during startup."""
    LLMFactory.preload_clients(["qwen", "deepseek"])
 def cleanup_runtime_dependencies() -> None:
    """Release runtime dependencies that expose explicit cleanup hooks."""
    LLMFactory.cleanup()
--- a/frontend/.env
+++ b/frontend/.env
@@ -0,0 +1,2 @@
 VITE_API_PROXY_TARGET=http://6.86.80.8:8000
 FRONTEND_PORT=5173
--- a/frontend/.env.development
+++ b/frontend/.env.development
@@ -0,0 +1,2 @@
 VITE_API_PROXY_TARGET=http://127.0.0.1:8000
 FRONTEND_PORT=5173
--- a/frontend/.env.example
+++ b/frontend/.env.example
@@ -0,0 +1,2 @@
 VITE_API_PROXY_TARGET=http://127.0.0.1:8000
 FRONTEND_PORT=5173
--- a/frontend/README.md
+++ b/frontend/README.md
@@ -49,6 +49,12 @@ npm run dev
 启动本地开发服务器，默认访问 `http://localhost:5173`
 前端环境文件约定如下：
 - `frontend/.env.development`：本地开发，默认代理到 `http://127.0.0.1:8000`
 - `frontend/.env.production`：生产构建，默认代理到 `http://6.86.80.8:8000`
 - `frontend/.env.local`：临时覆盖本机配置，优先级高于上面两者
 ### 构建生产版本
 ```bash
--- a/frontend/src/pages/RagChat/CitedAnswer.tsx
+++ b/frontend/src/pages/RagChat/CitedAnswer.tsx
@@ -1,4 +1,4 @@
-import React, { useRef } from 'react';
+import React from 'react';
 import { useTheme } from '../../contexts';
 import type { RetrievalData } from '../../types';
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -4,7 +4,8 @@ import react from '@vitejs/plugin-react'
 // https://vite.dev/config/
 export default defineConfig(({ mode }) => {
  const env = loadEnv(mode, process.cwd(), '')
-  const apiHost = env.API_HOST || '6.86.80.8'
+  // Default local frontend development to the local backend unless explicitly overridden.
  const apiHost = env.API_HOST || '127.0.0.1'
  const apiPort = env.API_PORT || '8000'
  const proxyTarget = env.VITE_API_PROXY_TARGET || `http://${apiHost}:${apiPort}`
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,7 @@ dependencies = [
    "celery>=5.3.0",
    "redis>=4.5.0",
    "minio>=7.1.0",
-    "psycopg2-binary>=2.9.0"
+    "psycopg2-binary>=2.9.0",
 ]
 [dependency-groups]
		`@@ -0,0 +1,2 @@`
							`VITE_API_PROXY_TARGET=http://6.86.80.8:8000`
							`FRONTEND_PORT=5173`
		`@@ -0,0 +1,2 @@`
							`VITE_API_PROXY_TARGET=http://127.0.0.1:8000`
							`FRONTEND_PORT=5173`