"""Share backend wiring for bootstrap.""" from __future__ import annotations from functools import lru_cache from typing import Callable from app.application.agent import AgentConversationService, AgentSessionService from app.application.documents import DocumentCommandService, DocumentQueryService from app.application.knowledge import KnowledgeRetrievalService from app.application.perception.services import PerceptionService from app.config.settings import settings from app.domain.documents import DocumentBinaryStore from app.domain.retrieval import VectorIndex from app.infrastructure.embedding.openai_compatible_embedding_provider import OpenAICompatibleEmbeddingProvider from app.infrastructure.llm.openai_compatible_answer_generator import OpenAICompatibleAnswerGenerator from app.infrastructure.parser.aliyun_document_parser import AliyunDocumentParser from app.infrastructure.parser.local_chunk_builder import LocalRegulationChunkBuilder from app.infrastructure.parser.local_document_parser import LocalDocumentParser from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder from app.infrastructure.perception.mock_event_store import MockEventStore from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore from app.infrastructure.storage.json_document_repository import JsonDocumentRepository from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore from app.infrastructure.vectorstore.bm25_retriever import BM25Retriever from app.infrastructure.vectorstore.cross_encoder_reranker import OpenAICompatibleReranker from app.infrastructure.vectorstore.dense_retriever import DenseRetriever from app.infrastructure.vectorstore.milvus_vector_index import MilvusVectorIndex from app.services.llm.llm_factory import LLMFactory # Keep shared wiring centralized so dependency construction remains consistent. class LazyBinaryStore(DocumentBinaryStore): """Delay MinIO connection work until binary storage is actually needed.""" def __init__(self, factory: Callable[[], DocumentBinaryStore]) -> None: """Initialize the lazy binary store wrapper.""" self._factory = factory self._store: DocumentBinaryStore | None = None def _get_store(self) -> DocumentBinaryStore: """Create the underlying store on first use and reuse it afterwards.""" if self._store is None: self._store = self._factory() return self._store @property def client(self): """Expose the underlying client for compatibility with health endpoints.""" return self._get_store().client def save( self, *, object_name: str, data: bytes, content_type: str, metadata: dict[str, str] | None = None, ) -> None: """Save data through the underlying binary store implementation.""" self._get_store().save( object_name=object_name, data=data, content_type=content_type, metadata=metadata, ) def read(self, object_name: str) -> bytes: """Read data through the underlying binary store implementation.""" return self._get_store().read(object_name) def delete(self, object_name: str) -> None: """Delete data through the underlying binary store implementation.""" self._get_store().delete(object_name) class LazyVectorIndex(VectorIndex): """Delay Milvus connection work until vector operations are actually needed.""" def __init__(self, factory: Callable[[], VectorIndex]) -> None: """Initialize the lazy vector index wrapper.""" self._factory = factory self._index: VectorIndex | None = None def _get_index(self) -> VectorIndex: """Create the underlying index on first use and reuse it afterwards.""" if self._index is None: self._index = self._factory() return self._index @property def collection(self): """Expose the underlying Milvus collection for compatibility adapters.""" return self._get_index().collection def upsert(self, chunks, vectors) -> int: """Insert or update vectors through the underlying vector index implementation.""" return self._get_index().upsert(chunks, vectors) def delete_by_document(self, doc_id: str) -> int: """Delete vectors through the underlying vector index implementation.""" return self._get_index().delete_by_document(doc_id) def search(self, query_vector: list[float], top_k: int, filters: str | None = None): """Search vectors through the underlying vector index implementation.""" return self._get_index().search(query_vector, top_k, filters) def count_by_document(self) -> dict[str, int]: """Count document vectors through the underlying vector index implementation.""" return self._get_index().count_by_document() def list_document_metadata(self) -> list[dict]: """List document metadata through the underlying vector index implementation.""" return self._get_index().list_document_metadata() def health(self) -> dict: """Return vector index health through the underlying vector index implementation.""" return self._get_index().health() @lru_cache def _build_binary_store() -> MinioDocumentBinaryStore: """Return the concrete binary store implementation.""" return MinioDocumentBinaryStore() @lru_cache def _build_vector_index() -> MilvusVectorIndex: """Return the concrete vector index implementation.""" return MilvusVectorIndex() @lru_cache def get_document_repository(): """Return document repository (json or postgres, controlled by settings).""" if settings.document_repository_backend == "postgres": return PostgresDocumentRepository() return JsonDocumentRepository(settings.document_metadata_path) @lru_cache def get_parse_artifact_store(): """Return parse artifact store, or None when postgres backend is not enabled.""" if settings.document_repository_backend == "postgres": return PostgresParseArtifactStore() return None @lru_cache def get_binary_store() -> DocumentBinaryStore: """Return binary store.""" return LazyBinaryStore(_build_binary_store) @lru_cache def get_parser(): """Return parser.""" if settings.parser_backend == "aliyun": return AliyunDocumentParser() return LocalDocumentParser() @lru_cache def get_chunk_builder(): """Return chunk builder.""" if settings.chunk_backend == "aliyun": return AliyunVectorChunkBuilder() return LocalRegulationChunkBuilder( chunk_size=settings.chunk_size, chunk_overlap=settings.chunk_overlap, ) @lru_cache def get_embedding_provider() -> OpenAICompatibleEmbeddingProvider: """Return embedding provider.""" return OpenAICompatibleEmbeddingProvider() @lru_cache def get_vector_index() -> VectorIndex: """Return vector index.""" return LazyVectorIndex(_build_vector_index) @lru_cache def get_reranker(): """Return reranker if enabled, else None.""" if settings.reranker_enabled and settings.reranker_base_url: return OpenAICompatibleReranker() return None @lru_cache def get_bm25_retriever() -> BM25Retriever | None: """Return BM25 retriever if rank_bm25 + jieba are installed, else None.""" retriever = BM25Retriever(vector_index=get_vector_index()) return retriever if retriever.available else None @lru_cache def get_retrieval_service() -> KnowledgeRetrievalService: """Return retrieval service.""" retriever = DenseRetriever( embedding_provider=get_embedding_provider(), vector_index=get_vector_index(), ) return KnowledgeRetrievalService( retriever=retriever, bm25_retriever=get_bm25_retriever(), reranker=get_reranker(), reranker_top_k=settings.reranker_top_k, ) @lru_cache def get_document_command_service() -> DocumentCommandService: """Return document command service.""" return DocumentCommandService( document_repository=get_document_repository(), binary_store=get_binary_store(), parser=get_parser(), chunk_builder=get_chunk_builder(), embedding_provider=get_embedding_provider(), vector_index=get_vector_index(), parse_artifact_store=get_parse_artifact_store(), ) @lru_cache def get_document_query_service() -> DocumentQueryService: """Return document query service.""" return DocumentQueryService( document_repository=get_document_repository(), binary_store=get_binary_store(), vector_index=get_vector_index(), ) @lru_cache def get_conversation_store() -> InMemoryConversationStore: """Return conversation store.""" return InMemoryConversationStore( max_sessions=settings.session_max_sessions, timeout_minutes=settings.session_timeout_minutes, ) @lru_cache def get_agent_conversation_service() -> AgentConversationService: """Return agent conversation service.""" return AgentConversationService( retrieval_service=get_retrieval_service(), answer_generator=OpenAICompatibleAnswerGenerator(), conversation_store=get_conversation_store(), ) @lru_cache def get_perception_service() -> PerceptionService: """Return perception service for regulatory intelligence.""" return PerceptionService( event_store=MockEventStore(), retrieval_service=get_retrieval_service(), ) @lru_cache def get_agent_session_service() -> AgentSessionService: """Return agent session service.""" return AgentSessionService(conversation_store=get_conversation_store()) def preload_runtime_dependencies() -> None: """Warm dependencies that are safe and useful to preload during startup.""" LLMFactory.preload_clients(["qwen", "deepseek"]) def cleanup_runtime_dependencies() -> None: """Release runtime dependencies that expose explicit cleanup hooks.""" LLMFactory.cleanup()