feat(bootstrap): refactor runtime dependency management and add lazy loading for binary store and vector index
feat(agent): update import for agent session service feat(openai): add context truncation check in OpenAI answer generator docs(README): update frontend environment file conventions fix(vite): default local frontend development to local backend
This commit is contained in:
@@ -11,7 +11,7 @@ from app.api.models import ErrorResponse
|
|||||||
from app.api.routes import api_router
|
from app.api.routes import api_router
|
||||||
from app.config.logging import setup_logging
|
from app.config.logging import setup_logging
|
||||||
from app.config.settings import settings
|
from app.config.settings import settings
|
||||||
from app.services.llm.llm_factory import LLMFactory
|
from app.shared.bootstrap import cleanup_runtime_dependencies, preload_runtime_dependencies
|
||||||
# Keep module behavior explicit so the backend flow stays easy to audit.
|
# Keep module behavior explicit so the backend flow stays easy to audit.
|
||||||
|
|
||||||
|
|
||||||
@@ -24,12 +24,12 @@ async def lifespan(app: FastAPI):
|
|||||||
logger.info(f"启动 {settings.app_name} v{settings.app_version}")
|
logger.info(f"启动 {settings.app_name} v{settings.app_version}")
|
||||||
logger.info(f"调试模式: {settings.debug}")
|
logger.info(f"调试模式: {settings.debug}")
|
||||||
logger.info("预加载LLM客户端...")
|
logger.info("预加载LLM客户端...")
|
||||||
LLMFactory.preload_clients(["qwen", "deepseek"])
|
preload_runtime_dependencies()
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
logger.info("应用关闭,执行清理...")
|
logger.info("应用关闭,执行清理...")
|
||||||
LLMFactory.cleanup()
|
cleanup_runtime_dependencies()
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ from app.api.models import (
|
|||||||
)
|
)
|
||||||
from app.config.settings import settings
|
from app.config.settings import settings
|
||||||
from app.shared.async_utils import iter_in_thread
|
from app.shared.async_utils import iter_in_thread
|
||||||
from app.shared.bootstrap import get_agent_conversation_service, get_conversation_store
|
from app.shared.bootstrap import get_agent_conversation_service, get_agent_session_service
|
||||||
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
# Keep route handlers close to their transport-layer wiring for easier auditing.
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -67,6 +67,21 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
|
|||||||
)
|
)
|
||||||
return messages, context_tokens
|
return messages, context_tokens
|
||||||
|
|
||||||
|
def _is_context_truncated(self, *, retrieved_chunks: list[RetrievedChunk], context_tokens: int) -> bool:
|
||||||
|
"""Return whether the prompt context had to omit retrieved chunks to fit the token budget."""
|
||||||
|
if not retrieved_chunks:
|
||||||
|
return False
|
||||||
|
estimated_total_tokens = sum(
|
||||||
|
self._estimate_tokens(
|
||||||
|
f"[{idx}] 文档: {chunk.doc_name}\n"
|
||||||
|
f"章节: {chunk.section_title or '未标注'}\n"
|
||||||
|
f"页码: {chunk.page_number}\n"
|
||||||
|
f"内容: {chunk.content}"
|
||||||
|
)
|
||||||
|
for idx, chunk in enumerate(retrieved_chunks, start=1)
|
||||||
|
)
|
||||||
|
return estimated_total_tokens > context_tokens
|
||||||
|
|
||||||
def _sources(self, chunks: list[RetrievedChunk]) -> list[AnswerSource]:
|
def _sources(self, chunks: list[RetrievedChunk]) -> list[AnswerSource]:
|
||||||
"""Handle sources for this module for the Open A I Compatible Answer Generator instance."""
|
"""Handle sources for this module for the Open A I Compatible Answer Generator instance."""
|
||||||
return [
|
return [
|
||||||
@@ -111,7 +126,10 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
|
|||||||
latency_ms=latency_ms,
|
latency_ms=latency_ms,
|
||||||
retrieved_count=len(retrieved_chunks),
|
retrieved_count=len(retrieved_chunks),
|
||||||
context_tokens=context_tokens,
|
context_tokens=context_tokens,
|
||||||
truncated=len(retrieved_chunks) > len(messages),
|
truncated=self._is_context_truncated(
|
||||||
|
retrieved_chunks=retrieved_chunks,
|
||||||
|
context_tokens=context_tokens,
|
||||||
|
),
|
||||||
error=response.error,
|
error=response.error,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -3,31 +3,134 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
from app.application.agent import AgentConversationService, AgentSessionService
|
from app.application.agent import AgentConversationService, AgentSessionService
|
||||||
from app.application.documents import DocumentCommandService, DocumentQueryService
|
from app.application.documents import DocumentCommandService, DocumentQueryService
|
||||||
from app.application.knowledge import KnowledgeRetrievalService
|
from app.application.knowledge import KnowledgeRetrievalService
|
||||||
|
from app.application.perception.services import PerceptionService
|
||||||
from app.config.settings import settings
|
from app.config.settings import settings
|
||||||
|
from app.domain.documents import DocumentBinaryStore
|
||||||
|
from app.domain.retrieval import VectorIndex
|
||||||
from app.infrastructure.embedding.openai_compatible_embedding_provider import OpenAICompatibleEmbeddingProvider
|
from app.infrastructure.embedding.openai_compatible_embedding_provider import OpenAICompatibleEmbeddingProvider
|
||||||
from app.infrastructure.llm.openai_compatible_answer_generator import OpenAICompatibleAnswerGenerator
|
from app.infrastructure.llm.openai_compatible_answer_generator import OpenAICompatibleAnswerGenerator
|
||||||
from app.infrastructure.parser.aliyun_document_parser import AliyunDocumentParser
|
from app.infrastructure.parser.aliyun_document_parser import AliyunDocumentParser
|
||||||
from app.infrastructure.parser.local_chunk_builder import LocalRegulationChunkBuilder
|
from app.infrastructure.parser.local_chunk_builder import LocalRegulationChunkBuilder
|
||||||
from app.infrastructure.parser.local_document_parser import LocalDocumentParser
|
from app.infrastructure.parser.local_document_parser import LocalDocumentParser
|
||||||
from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder
|
from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder
|
||||||
|
from app.infrastructure.perception.mock_event_store import MockEventStore
|
||||||
from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore
|
from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore
|
||||||
from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
|
from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
|
||||||
from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore
|
from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore
|
||||||
from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository
|
from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository
|
||||||
from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore
|
from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore
|
||||||
from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever
|
from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever
|
||||||
|
from app.infrastructure.vectorstore.cross_encoder_reranker import OpenAICompatibleReranker
|
||||||
from app.infrastructure.vectorstore.dense_retriever import DenseRetriever
|
from app.infrastructure.vectorstore.dense_retriever import DenseRetriever
|
||||||
from app.infrastructure.vectorstore.milvus_vector_index import MilvusVectorIndex
|
from app.infrastructure.vectorstore.milvus_vector_index import MilvusVectorIndex
|
||||||
from app.infrastructure.vectorstore.cross_encoder_reranker import OpenAICompatibleReranker
|
from app.services.llm.llm_factory import LLMFactory
|
||||||
from app.infrastructure.perception.mock_event_store import MockEventStore
|
|
||||||
from app.application.perception.services import PerceptionService
|
|
||||||
# Keep shared wiring centralized so dependency construction remains consistent.
|
# Keep shared wiring centralized so dependency construction remains consistent.
|
||||||
|
|
||||||
|
|
||||||
|
class LazyBinaryStore(DocumentBinaryStore):
|
||||||
|
"""Delay MinIO connection work until binary storage is actually needed."""
|
||||||
|
|
||||||
|
def __init__(self, factory: Callable[[], DocumentBinaryStore]) -> None:
|
||||||
|
"""Initialize the lazy binary store wrapper."""
|
||||||
|
self._factory = factory
|
||||||
|
self._store: DocumentBinaryStore | None = None
|
||||||
|
|
||||||
|
def _get_store(self) -> DocumentBinaryStore:
|
||||||
|
"""Create the underlying store on first use and reuse it afterwards."""
|
||||||
|
if self._store is None:
|
||||||
|
self._store = self._factory()
|
||||||
|
return self._store
|
||||||
|
|
||||||
|
@property
|
||||||
|
def client(self):
|
||||||
|
"""Expose the underlying client for compatibility with health endpoints."""
|
||||||
|
return self._get_store().client
|
||||||
|
|
||||||
|
def save(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
object_name: str,
|
||||||
|
data: bytes,
|
||||||
|
content_type: str,
|
||||||
|
metadata: dict[str, str] | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Save data through the underlying binary store implementation."""
|
||||||
|
self._get_store().save(
|
||||||
|
object_name=object_name,
|
||||||
|
data=data,
|
||||||
|
content_type=content_type,
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
def read(self, object_name: str) -> bytes:
|
||||||
|
"""Read data through the underlying binary store implementation."""
|
||||||
|
return self._get_store().read(object_name)
|
||||||
|
|
||||||
|
def delete(self, object_name: str) -> None:
|
||||||
|
"""Delete data through the underlying binary store implementation."""
|
||||||
|
self._get_store().delete(object_name)
|
||||||
|
|
||||||
|
|
||||||
|
class LazyVectorIndex(VectorIndex):
|
||||||
|
"""Delay Milvus connection work until vector operations are actually needed."""
|
||||||
|
|
||||||
|
def __init__(self, factory: Callable[[], VectorIndex]) -> None:
|
||||||
|
"""Initialize the lazy vector index wrapper."""
|
||||||
|
self._factory = factory
|
||||||
|
self._index: VectorIndex | None = None
|
||||||
|
|
||||||
|
def _get_index(self) -> VectorIndex:
|
||||||
|
"""Create the underlying index on first use and reuse it afterwards."""
|
||||||
|
if self._index is None:
|
||||||
|
self._index = self._factory()
|
||||||
|
return self._index
|
||||||
|
|
||||||
|
@property
|
||||||
|
def collection(self):
|
||||||
|
"""Expose the underlying Milvus collection for compatibility adapters."""
|
||||||
|
return self._get_index().collection
|
||||||
|
|
||||||
|
def upsert(self, chunks, vectors) -> int:
|
||||||
|
"""Insert or update vectors through the underlying vector index implementation."""
|
||||||
|
return self._get_index().upsert(chunks, vectors)
|
||||||
|
|
||||||
|
def delete_by_document(self, doc_id: str) -> int:
|
||||||
|
"""Delete vectors through the underlying vector index implementation."""
|
||||||
|
return self._get_index().delete_by_document(doc_id)
|
||||||
|
|
||||||
|
def search(self, query_vector: list[float], top_k: int, filters: str | None = None):
|
||||||
|
"""Search vectors through the underlying vector index implementation."""
|
||||||
|
return self._get_index().search(query_vector, top_k, filters)
|
||||||
|
|
||||||
|
def count_by_document(self) -> dict[str, int]:
|
||||||
|
"""Count document vectors through the underlying vector index implementation."""
|
||||||
|
return self._get_index().count_by_document()
|
||||||
|
|
||||||
|
def list_document_metadata(self) -> list[dict]:
|
||||||
|
"""List document metadata through the underlying vector index implementation."""
|
||||||
|
return self._get_index().list_document_metadata()
|
||||||
|
|
||||||
|
def health(self) -> dict:
|
||||||
|
"""Return vector index health through the underlying vector index implementation."""
|
||||||
|
return self._get_index().health()
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
|
def _build_binary_store() -> MinioDocumentBinaryStore:
|
||||||
|
"""Return the concrete binary store implementation."""
|
||||||
|
return MinioDocumentBinaryStore()
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
|
def _build_vector_index() -> MilvusVectorIndex:
|
||||||
|
"""Return the concrete vector index implementation."""
|
||||||
|
return MilvusVectorIndex()
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_document_repository():
|
def get_document_repository():
|
||||||
@@ -46,9 +149,9 @@ def get_parse_artifact_store():
|
|||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_binary_store() -> MinioDocumentBinaryStore:
|
def get_binary_store() -> DocumentBinaryStore:
|
||||||
"""Return binary store."""
|
"""Return binary store."""
|
||||||
return MinioDocumentBinaryStore()
|
return LazyBinaryStore(_build_binary_store)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
@@ -77,9 +180,9 @@ def get_embedding_provider() -> OpenAICompatibleEmbeddingProvider:
|
|||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_vector_index() -> MilvusVectorIndex:
|
def get_vector_index() -> VectorIndex:
|
||||||
"""Return vector index."""
|
"""Return vector index."""
|
||||||
return MilvusVectorIndex()
|
return LazyVectorIndex(_build_vector_index)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
@@ -162,6 +265,19 @@ def get_perception_service() -> PerceptionService:
|
|||||||
event_store=MockEventStore(),
|
event_store=MockEventStore(),
|
||||||
retrieval_service=get_retrieval_service(),
|
retrieval_service=get_retrieval_service(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
def get_agent_session_service() -> AgentSessionService:
|
def get_agent_session_service() -> AgentSessionService:
|
||||||
"""Return agent session service."""
|
"""Return agent session service."""
|
||||||
return AgentSessionService(conversation_store=get_conversation_store())
|
return AgentSessionService(conversation_store=get_conversation_store())
|
||||||
|
|
||||||
|
|
||||||
|
def preload_runtime_dependencies() -> None:
|
||||||
|
"""Warm dependencies that are safe and useful to preload during startup."""
|
||||||
|
LLMFactory.preload_clients(["qwen", "deepseek"])
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_runtime_dependencies() -> None:
|
||||||
|
"""Release runtime dependencies that expose explicit cleanup hooks."""
|
||||||
|
LLMFactory.cleanup()
|
||||||
|
|||||||
2
frontend/.env
Normal file
2
frontend/.env
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
VITE_API_PROXY_TARGET=http://6.86.80.8:8000
|
||||||
|
FRONTEND_PORT=5173
|
||||||
2
frontend/.env.development
Normal file
2
frontend/.env.development
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
VITE_API_PROXY_TARGET=http://127.0.0.1:8000
|
||||||
|
FRONTEND_PORT=5173
|
||||||
2
frontend/.env.example
Normal file
2
frontend/.env.example
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
VITE_API_PROXY_TARGET=http://127.0.0.1:8000
|
||||||
|
FRONTEND_PORT=5173
|
||||||
@@ -49,6 +49,12 @@ npm run dev
|
|||||||
|
|
||||||
启动本地开发服务器,默认访问 `http://localhost:5173`
|
启动本地开发服务器,默认访问 `http://localhost:5173`
|
||||||
|
|
||||||
|
前端环境文件约定如下:
|
||||||
|
|
||||||
|
- `frontend/.env.development`:本地开发,默认代理到 `http://127.0.0.1:8000`
|
||||||
|
- `frontend/.env.production`:生产构建,默认代理到 `http://6.86.80.8:8000`
|
||||||
|
- `frontend/.env.local`:临时覆盖本机配置,优先级高于上面两者
|
||||||
|
|
||||||
### 构建生产版本
|
### 构建生产版本
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import React, { useRef } from 'react';
|
import React from 'react';
|
||||||
import { useTheme } from '../../contexts';
|
import { useTheme } from '../../contexts';
|
||||||
import type { RetrievalData } from '../../types';
|
import type { RetrievalData } from '../../types';
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ import react from '@vitejs/plugin-react'
|
|||||||
// https://vite.dev/config/
|
// https://vite.dev/config/
|
||||||
export default defineConfig(({ mode }) => {
|
export default defineConfig(({ mode }) => {
|
||||||
const env = loadEnv(mode, process.cwd(), '')
|
const env = loadEnv(mode, process.cwd(), '')
|
||||||
const apiHost = env.API_HOST || '6.86.80.8'
|
// Default local frontend development to the local backend unless explicitly overridden.
|
||||||
|
const apiHost = env.API_HOST || '127.0.0.1'
|
||||||
const apiPort = env.API_PORT || '8000'
|
const apiPort = env.API_PORT || '8000'
|
||||||
const proxyTarget = env.VITE_API_PROXY_TARGET || `http://${apiHost}:${apiPort}`
|
const proxyTarget = env.VITE_API_PROXY_TARGET || `http://${apiHost}:${apiPort}`
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ dependencies = [
|
|||||||
"celery>=5.3.0",
|
"celery>=5.3.0",
|
||||||
"redis>=4.5.0",
|
"redis>=4.5.0",
|
||||||
"minio>=7.1.0",
|
"minio>=7.1.0",
|
||||||
"psycopg2-binary>=2.9.0"
|
"psycopg2-binary>=2.9.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
|
|||||||
Reference in New Issue
Block a user