Files
AIRegulation-DocAnalysis/backend/app/shared/bootstrap.py

145 lines
4.9 KiB
Python
Raw Normal View History

"""Share backend wiring for bootstrap."""
from __future__ import annotations
from functools import lru_cache
from app.application.agent import AgentConversationService
from app.application.documents import DocumentCommandService, DocumentQueryService
from app.application.knowledge import KnowledgeRetrievalService
from app.config.settings import settings
from app.infrastructure.embedding.openai_compatible_embedding_provider import OpenAICompatibleEmbeddingProvider
from app.infrastructure.llm.openai_compatible_answer_generator import OpenAICompatibleAnswerGenerator
from app.infrastructure.parser.aliyun_document_parser import AliyunDocumentParser
from app.infrastructure.parser.local_chunk_builder import LocalRegulationChunkBuilder
from app.infrastructure.parser.local_document_parser import LocalDocumentParser
from app.infrastructure.parser.vector_chunk_builder import AliyunVectorChunkBuilder
from app.infrastructure.session.in_memory_conversation_store import InMemoryConversationStore
from app.infrastructure.storage.json_document_repository import JsonDocumentRepository
from app.infrastructure.storage.minio_binary_store import MinioDocumentBinaryStore
from app.infrastructure.storage.postgres_document_repository import PostgresDocumentRepository
from app.infrastructure.storage.postgres_parse_artifact_store import PostgresParseArtifactStore
from app.infrastructure.vectorstore.dense_retriever import DenseRetriever
from app.infrastructure.vectorstore.milvus_vector_index import MilvusVectorIndex
from app.infrastructure.vectorstore.cross_encoder_reranker import OpenAICompatibleReranker
# Keep shared wiring centralized so dependency construction remains consistent.
@lru_cache
def get_document_repository():
"""Return document repository (json or postgres, controlled by settings)."""
if settings.document_repository_backend == "postgres":
return PostgresDocumentRepository()
return JsonDocumentRepository(settings.document_metadata_path)
@lru_cache
def get_parse_artifact_store():
"""Return parse artifact store, or None when postgres backend is not enabled."""
if settings.document_repository_backend == "postgres":
return PostgresParseArtifactStore()
return None
@lru_cache
def get_binary_store() -> MinioDocumentBinaryStore:
"""Return binary store."""
return MinioDocumentBinaryStore()
@lru_cache
def get_parser():
"""Return parser."""
if settings.parser_backend == "aliyun":
return AliyunDocumentParser()
return LocalDocumentParser()
@lru_cache
def get_chunk_builder():
"""Return chunk builder."""
if settings.chunk_backend == "aliyun":
return AliyunVectorChunkBuilder()
return LocalRegulationChunkBuilder(
chunk_size=settings.chunk_size,
chunk_overlap=settings.chunk_overlap,
)
@lru_cache
def get_embedding_provider() -> OpenAICompatibleEmbeddingProvider:
"""Return embedding provider."""
return OpenAICompatibleEmbeddingProvider()
@lru_cache
def get_vector_index() -> MilvusVectorIndex:
"""Return vector index."""
return MilvusVectorIndex()
@lru_cache
def get_reranker():
"""Return reranker if enabled, else None."""
if settings.reranker_enabled and settings.reranker_base_url:
return OpenAICompatibleReranker()
return None
@lru_cache
def get_retrieval_service() -> KnowledgeRetrievalService:
"""Return retrieval service."""
retriever = DenseRetriever(
embedding_provider=get_embedding_provider(),
vector_index=get_vector_index(),
)
return KnowledgeRetrievalService(
retriever=retriever,
reranker=get_reranker(),
reranker_top_k=settings.reranker_top_k,
)
@lru_cache
def get_document_command_service() -> DocumentCommandService:
"""Return document command service."""
return DocumentCommandService(
document_repository=get_document_repository(),
binary_store=get_binary_store(),
parser=get_parser(),
chunk_builder=get_chunk_builder(),
embedding_provider=get_embedding_provider(),
vector_index=get_vector_index(),
parse_artifact_store=get_parse_artifact_store(),
)
@lru_cache
def get_document_query_service() -> DocumentQueryService:
"""Return document query service."""
return DocumentQueryService(
document_repository=get_document_repository(),
binary_store=get_binary_store(),
vector_index=get_vector_index(),
)
@lru_cache
def get_conversation_store() -> InMemoryConversationStore:
"""Return conversation store."""
return InMemoryConversationStore(
max_sessions=settings.session_max_sessions,
timeout_minutes=settings.session_timeout_minutes,
)
@lru_cache
def get_agent_conversation_service() -> AgentConversationService:
"""Return agent conversation service."""
return AgentConversationService(
retrieval_service=get_retrieval_service(),
answer_generator=OpenAICompatibleAnswerGenerator(),
conversation_store=get_conversation_store(),
)