diff --git a/backend/app/api/main.py b/backend/app/api/main.py index 7f7e48f..b5fec60 100644 --- a/backend/app/api/main.py +++ b/backend/app/api/main.py @@ -11,7 +11,7 @@ from app.api.models import ErrorResponse from app.api.routes import api_router from app.config.logging import setup_logging from app.config.settings import settings -from app.services.llm.llm_factory import LLMFactory +from app.shared.bootstrap import cleanup_runtime_dependencies, preload_runtime_dependencies # Keep module behavior explicit so the backend flow stays easy to audit. @@ -24,12 +24,12 @@ async def lifespan(app: FastAPI): logger.info(f"启动 {settings.app_name} v{settings.app_version}") logger.info(f"调试模式: {settings.debug}") logger.info("预加载LLM客户端...") - LLMFactory.preload_clients(["qwen", "deepseek"]) + preload_runtime_dependencies() yield logger.info("应用关闭,执行清理...") - LLMFactory.cleanup() + cleanup_runtime_dependencies() app = FastAPI( diff --git a/backend/app/api/routes/agent.py b/backend/app/api/routes/agent.py index c0349fc..651aedf 100644 --- a/backend/app/api/routes/agent.py +++ b/backend/app/api/routes/agent.py @@ -20,7 +20,7 @@ from app.api.models import ( ) from app.config.settings import settings from app.shared.async_utils import iter_in_thread -from app.shared.bootstrap import get_agent_conversation_service, get_conversation_store +from app.shared.bootstrap import get_agent_conversation_service, get_agent_session_service # Keep route handlers close to their transport-layer wiring for easier auditing. diff --git a/backend/app/infrastructure/llm/openai_compatible_answer_generator.py b/backend/app/infrastructure/llm/openai_compatible_answer_generator.py index 2c296bb..1eee7c7 100644 --- a/backend/app/infrastructure/llm/openai_compatible_answer_generator.py +++ b/backend/app/infrastructure/llm/openai_compatible_answer_generator.py @@ -67,6 +67,21 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator): ) return messages, context_tokens + def _is_context_truncated(self, *, retrieved_chunks: list[RetrievedChunk], context_tokens: int) -> bool: + """Return whether the prompt context had to omit retrieved chunks to fit the token budget.""" + if not retrieved_chunks: + return False + estimated_total_tokens = sum( + self._estimate_tokens( + f"[{idx}] 文档: {chunk.doc_name}\n" + f"章节: {chunk.section_title or '未标注'}\n" + f"页码: {chunk.page_number}\n" + f"内容: {chunk.content}" + ) + for idx, chunk in enumerate(retrieved_chunks, start=1) + ) + return estimated_total_tokens > context_tokens + def _sources(self, chunks: list[RetrievedChunk]) -> list[AnswerSource]: """Handle sources for this module for the Open A I Compatible Answer Generator instance.""" return [ @@ -111,7 +126,10 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator): latency_ms=latency_ms, retrieved_count=len(retrieved_chunks), context_tokens=context_tokens, - truncated=len(retrieved_chunks) > len(messages), + truncated=self._is_context_truncated( + retrieved_chunks=retrieved_chunks, + context_tokens=context_tokens, + ), error=response.error, ) diff --git a/backend/app/shared/bootstrap.py b/backend/app/shared/bootstrap.py index 47947f1..e11aba3 100644 --- a/backend/app/shared/bootstrap.py +++ b/backend/app/shared/bootstrap.py @@ -3,31 +3,134 @@ from __future__ import annotations from functools import lru_cache +from typing import Callable from app.application.agent import AgentConversationService, AgentSessionService from app.application.documents import DocumentCommandService, DocumentQueryService from app.application.knowledge import KnowledgeRetrievalService +from app.application.perception.services import PerceptionService from app.config.settings import settings +from app.domain.documents import DocumentBinaryStore +from app.domain.retrieval import VectorIndex from app.infrastructure.embedding.openai_compatible_embedding_provider import OpenAICompatibleEmbeddingProvider from app.infrastructure.llm.openai_compatible_answer_generator import OpenAICompatibleAnswerGenerator from app.infrastructure.parser.aliyun_document_parser import AliyunDocumentParser from app.infrastructure.parser.local_chunk_builder import LocalRegulationChunkBuilder from app.infrastructure.parser.local_document_parser import LocalDocumentParser from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder +from app.infrastructure.perception.mock_event_store import MockEventStore from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore from app.infrastructure.storage.json_document_repository import JsonDocumentRepository from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever +from app.infrastructure.vectorstore.cross_encoder_reranker import OpenAICompatibleReranker from app.infrastructure.vectorstore.dense_retriever import DenseRetriever from app.infrastructure.vectorstore.milvus_vector_index import MilvusVectorIndex -from app.infrastructure.vectorstore.cross_encoder_reranker import OpenAICompatibleReranker -from app.infrastructure.perception.mock_event_store import MockEventStore -from app.application.perception.services import PerceptionService +from app.services.llm.llm_factory import LLMFactory # Keep shared wiring centralized so dependency construction remains consistent. +class LazyBinaryStore(DocumentBinaryStore): + """Delay MinIO connection work until binary storage is actually needed.""" + + def __init__(self, factory: Callable[[], DocumentBinaryStore]) -> None: + """Initialize the lazy binary store wrapper.""" + self._factory = factory + self._store: DocumentBinaryStore | None = None + + def _get_store(self) -> DocumentBinaryStore: + """Create the underlying store on first use and reuse it afterwards.""" + if self._store is None: + self._store = self._factory() + return self._store + + @property + def client(self): + """Expose the underlying client for compatibility with health endpoints.""" + return self._get_store().client + + def save( + self, + *, + object_name: str, + data: bytes, + content_type: str, + metadata: dict[str, str] | None = None, + ) -> None: + """Save data through the underlying binary store implementation.""" + self._get_store().save( + object_name=object_name, + data=data, + content_type=content_type, + metadata=metadata, + ) + + def read(self, object_name: str) -> bytes: + """Read data through the underlying binary store implementation.""" + return self._get_store().read(object_name) + + def delete(self, object_name: str) -> None: + """Delete data through the underlying binary store implementation.""" + self._get_store().delete(object_name) + + +class LazyVectorIndex(VectorIndex): + """Delay Milvus connection work until vector operations are actually needed.""" + + def __init__(self, factory: Callable[[], VectorIndex]) -> None: + """Initialize the lazy vector index wrapper.""" + self._factory = factory + self._index: VectorIndex | None = None + + def _get_index(self) -> VectorIndex: + """Create the underlying index on first use and reuse it afterwards.""" + if self._index is None: + self._index = self._factory() + return self._index + + @property + def collection(self): + """Expose the underlying Milvus collection for compatibility adapters.""" + return self._get_index().collection + + def upsert(self, chunks, vectors) -> int: + """Insert or update vectors through the underlying vector index implementation.""" + return self._get_index().upsert(chunks, vectors) + + def delete_by_document(self, doc_id: str) -> int: + """Delete vectors through the underlying vector index implementation.""" + return self._get_index().delete_by_document(doc_id) + + def search(self, query_vector: list[float], top_k: int, filters: str | None = None): + """Search vectors through the underlying vector index implementation.""" + return self._get_index().search(query_vector, top_k, filters) + + def count_by_document(self) -> dict[str, int]: + """Count document vectors through the underlying vector index implementation.""" + return self._get_index().count_by_document() + + def list_document_metadata(self) -> list[dict]: + """List document metadata through the underlying vector index implementation.""" + return self._get_index().list_document_metadata() + + def health(self) -> dict: + """Return vector index health through the underlying vector index implementation.""" + return self._get_index().health() + + +@lru_cache +def _build_binary_store() -> MinioDocumentBinaryStore: + """Return the concrete binary store implementation.""" + return MinioDocumentBinaryStore() + + +@lru_cache +def _build_vector_index() -> MilvusVectorIndex: + """Return the concrete vector index implementation.""" + return MilvusVectorIndex() + @lru_cache def get_document_repository(): @@ -46,9 +149,9 @@ def get_parse_artifact_store(): @lru_cache -def get_binary_store() -> MinioDocumentBinaryStore: +def get_binary_store() -> DocumentBinaryStore: """Return binary store.""" - return MinioDocumentBinaryStore() + return LazyBinaryStore(_build_binary_store) @lru_cache @@ -77,9 +180,9 @@ def get_embedding_provider() -> OpenAICompatibleEmbeddingProvider: @lru_cache -def get_vector_index() -> MilvusVectorIndex: +def get_vector_index() -> VectorIndex: """Return vector index.""" - return MilvusVectorIndex() + return LazyVectorIndex(_build_vector_index) @lru_cache @@ -162,6 +265,19 @@ def get_perception_service() -> PerceptionService: event_store=MockEventStore(), retrieval_service=get_retrieval_service(), ) + + +@lru_cache def get_agent_session_service() -> AgentSessionService: """Return agent session service.""" return AgentSessionService(conversation_store=get_conversation_store()) + + +def preload_runtime_dependencies() -> None: + """Warm dependencies that are safe and useful to preload during startup.""" + LLMFactory.preload_clients(["qwen", "deepseek"]) + + +def cleanup_runtime_dependencies() -> None: + """Release runtime dependencies that expose explicit cleanup hooks.""" + LLMFactory.cleanup() diff --git a/frontend/.env b/frontend/.env new file mode 100644 index 0000000..d60765d --- /dev/null +++ b/frontend/.env @@ -0,0 +1,2 @@ +VITE_API_PROXY_TARGET=http://6.86.80.8:8000 +FRONTEND_PORT=5173 diff --git a/frontend/.env.development b/frontend/.env.development new file mode 100644 index 0000000..191794b --- /dev/null +++ b/frontend/.env.development @@ -0,0 +1,2 @@ +VITE_API_PROXY_TARGET=http://127.0.0.1:8000 +FRONTEND_PORT=5173 diff --git a/frontend/.env.example b/frontend/.env.example new file mode 100644 index 0000000..191794b --- /dev/null +++ b/frontend/.env.example @@ -0,0 +1,2 @@ +VITE_API_PROXY_TARGET=http://127.0.0.1:8000 +FRONTEND_PORT=5173 diff --git a/frontend/README.md b/frontend/README.md index cfb6d1a..7032377 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -49,6 +49,12 @@ npm run dev 启动本地开发服务器,默认访问 `http://localhost:5173` +前端环境文件约定如下: + +- `frontend/.env.development`:本地开发,默认代理到 `http://127.0.0.1:8000` +- `frontend/.env.production`:生产构建,默认代理到 `http://6.86.80.8:8000` +- `frontend/.env.local`:临时覆盖本机配置,优先级高于上面两者 + ### 构建生产版本 ```bash diff --git a/frontend/src/pages/RagChat/CitedAnswer.tsx b/frontend/src/pages/RagChat/CitedAnswer.tsx index 42ccfef..1f33b5e 100644 --- a/frontend/src/pages/RagChat/CitedAnswer.tsx +++ b/frontend/src/pages/RagChat/CitedAnswer.tsx @@ -1,4 +1,4 @@ -import React, { useRef } from 'react'; +import React from 'react'; import { useTheme } from '../../contexts'; import type { RetrievalData } from '../../types'; diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index 1e02afe..1f0da05 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -4,7 +4,8 @@ import react from '@vitejs/plugin-react' // https://vite.dev/config/ export default defineConfig(({ mode }) => { const env = loadEnv(mode, process.cwd(), '') - const apiHost = env.API_HOST || '6.86.80.8' + // Default local frontend development to the local backend unless explicitly overridden. + const apiHost = env.API_HOST || '127.0.0.1' const apiPort = env.API_PORT || '8000' const proxyTarget = env.VITE_API_PROXY_TARGET || `http://${apiHost}:${apiPort}` diff --git a/pyproject.toml b/pyproject.toml index 375c01e..7b69071 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "celery>=5.3.0", "redis>=4.5.0", "minio>=7.1.0", - "psycopg2-binary>=2.9.0" + "psycopg2-binary>=2.9.0", ] [dependency-groups]