Fix SSE route dependency and align architecture docs
This commit is contained in:
5
backend/app/application/__init__.py
Normal file
5
backend/app/application/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Initialize the app.application package."""
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = []
|
||||
7
backend/app/application/agent/__init__.py
Normal file
7
backend/app/application/agent/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Initialize the app.application.agent package."""
|
||||
|
||||
from .services import AgentConversationService
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = ["AgentConversationService"]
|
||||
145
backend/app/application/agent/services.py
Normal file
145
backend/app/application/agent/services.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""Implement application-layer logic for services."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Generator
|
||||
|
||||
from app.domain.conversation import AnswerGenerator, AnswerResult, ConversationStore
|
||||
from app.domain.retrieval import RetrievedChunk
|
||||
|
||||
from app.application.knowledge import KnowledgeRetrievalService
|
||||
# Keep orchestration logic centralized so use-case flow stays easy to trace.
|
||||
|
||||
|
||||
|
||||
class AgentConversationService:
|
||||
"""Provide the Agent Conversation Service service."""
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
retrieval_service: KnowledgeRetrievalService,
|
||||
answer_generator: AnswerGenerator,
|
||||
conversation_store: ConversationStore,
|
||||
) -> None:
|
||||
"""Initialize the Agent Conversation Service instance."""
|
||||
self.retrieval_service = retrieval_service
|
||||
self.answer_generator = answer_generator
|
||||
self.conversation_store = conversation_store
|
||||
|
||||
def ask(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
filters: str | None = None,
|
||||
provider: str | None = None,
|
||||
model: str | None = None,
|
||||
top_k: int = 5,
|
||||
prompt_template: str | None = None,
|
||||
session_id: str | None = None,
|
||||
) -> tuple[str | None, AnswerResult]:
|
||||
"""Handle ask for the Agent Conversation Service instance."""
|
||||
history = None
|
||||
active_session_id = None
|
||||
if session_id:
|
||||
session = self.conversation_store.get_session(session_id)
|
||||
if not session:
|
||||
raise ValueError("会话不存在或已过期")
|
||||
history = [{"role": msg.role, "content": msg.content} for msg in session.messages[-10:]]
|
||||
active_session_id = session.session_id
|
||||
self.conversation_store.save_message(session_id, role="user", content=query)
|
||||
retrieved = self.retrieval_service.retrieve(query=query, top_k=top_k, filters=filters)
|
||||
result = self.answer_generator.generate(
|
||||
query=query,
|
||||
retrieved_chunks=retrieved,
|
||||
history=history,
|
||||
provider=provider,
|
||||
model=model,
|
||||
prompt_template=prompt_template,
|
||||
)
|
||||
if active_session_id:
|
||||
self.conversation_store.save_message(
|
||||
active_session_id,
|
||||
role="assistant",
|
||||
content=result.answer,
|
||||
sources=[source.__dict__ for source in result.sources],
|
||||
)
|
||||
return active_session_id, result
|
||||
|
||||
def chat(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
session_id: str | None = None,
|
||||
filters: str | None = None,
|
||||
provider: str | None = None,
|
||||
model: str | None = None,
|
||||
top_k: int = 5,
|
||||
) -> tuple[str, AnswerResult]:
|
||||
"""Handle chat for the Agent Conversation Service instance."""
|
||||
session = self.conversation_store.get_session(session_id) if session_id else None
|
||||
if session is None:
|
||||
session = self.conversation_store.create_session()
|
||||
self.conversation_store.save_message(session.session_id, role="user", content=query)
|
||||
history = [{"role": msg.role, "content": msg.content} for msg in session.messages[-10:]]
|
||||
retrieved = self.retrieval_service.retrieve(query=query, top_k=top_k, filters=filters)
|
||||
result = self.answer_generator.generate(
|
||||
query=query,
|
||||
retrieved_chunks=retrieved,
|
||||
history=history,
|
||||
provider=provider,
|
||||
model=model,
|
||||
)
|
||||
self.conversation_store.save_message(
|
||||
session.session_id,
|
||||
role="assistant",
|
||||
content=result.answer,
|
||||
sources=[source.__dict__ for source in result.sources],
|
||||
)
|
||||
return session.session_id, result
|
||||
|
||||
def stream_chat(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
session_id: str | None = None,
|
||||
filters: str | None = None,
|
||||
provider: str | None = None,
|
||||
model: str | None = None,
|
||||
top_k: int = 5,
|
||||
prompt_template: str | None = None,
|
||||
) -> tuple[str, Generator[dict, None, None]]:
|
||||
"""Stream chat for the Agent Conversation Service instance."""
|
||||
session = self.conversation_store.get_session(session_id) if session_id else None
|
||||
if session is None:
|
||||
session = self.conversation_store.create_session()
|
||||
self.conversation_store.save_message(session.session_id, role="user", content=query)
|
||||
history = [{"role": msg.role, "content": msg.content} for msg in session.messages[-10:]]
|
||||
retrieved = self.retrieval_service.retrieve(query=query, top_k=top_k, filters=filters)
|
||||
|
||||
def event_stream() -> Generator[dict, None, None]:
|
||||
"""Handle event stream for the Agent Conversation Service instance."""
|
||||
yield {"event": "status", "data": f"找到{len(retrieved)}条相关法规,正在生成回答..."}
|
||||
answer_parts: list[str] = []
|
||||
sources_payload: list[dict] = []
|
||||
for event in self.answer_generator.stream_generate(
|
||||
query=query,
|
||||
retrieved_chunks=retrieved,
|
||||
history=history,
|
||||
provider=provider,
|
||||
model=model,
|
||||
prompt_template=prompt_template,
|
||||
):
|
||||
if event.get("event") == "sources":
|
||||
sources_payload = event.get("data", [])
|
||||
if event.get("event") == "content":
|
||||
answer_parts.append(str(event.get("data", "")))
|
||||
yield event
|
||||
full_answer = "".join(answer_parts)
|
||||
self.conversation_store.save_message(
|
||||
session.session_id,
|
||||
role="assistant",
|
||||
content=full_answer,
|
||||
sources=sources_payload,
|
||||
)
|
||||
|
||||
return session.session_id, event_stream()
|
||||
7
backend/app/application/documents/__init__.py
Normal file
7
backend/app/application/documents/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Initialize the app.application.documents package."""
|
||||
|
||||
from .services import DocumentCommandService, DocumentProcessResult, DocumentQueryService
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = ["DocumentCommandService", "DocumentProcessResult", "DocumentQueryService"]
|
||||
186
backend/app/application/documents/services.py
Normal file
186
backend/app/application/documents/services.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""Implement application-layer logic for services."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from app.domain.documents import (
|
||||
ChunkBuilder,
|
||||
Document,
|
||||
DocumentBinaryStore,
|
||||
DocumentParser,
|
||||
DocumentRepository,
|
||||
DocumentStatus,
|
||||
)
|
||||
from app.domain.retrieval import EmbeddingProvider, VectorIndex
|
||||
# Keep orchestration logic centralized so use-case flow stays easy to trace.
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentProcessResult:
|
||||
"""Represent document process result data."""
|
||||
doc_id: str
|
||||
doc_name: str
|
||||
status: str
|
||||
message: str
|
||||
num_chunks: int = 0
|
||||
summary: str = ""
|
||||
summary_latency_ms: int = 0
|
||||
|
||||
|
||||
class DocumentCommandService:
|
||||
"""Provide the Document Command Service service."""
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
document_repository: DocumentRepository,
|
||||
binary_store: DocumentBinaryStore,
|
||||
parser: DocumentParser,
|
||||
chunk_builder: ChunkBuilder,
|
||||
embedding_provider: EmbeddingProvider,
|
||||
vector_index: VectorIndex,
|
||||
) -> None:
|
||||
"""Initialize the Document Command Service instance."""
|
||||
self.document_repository = document_repository
|
||||
self.binary_store = binary_store
|
||||
self.parser = parser
|
||||
self.chunk_builder = chunk_builder
|
||||
self.embedding_provider = embedding_provider
|
||||
self.vector_index = vector_index
|
||||
|
||||
def upload_and_process(
|
||||
self,
|
||||
*,
|
||||
doc_id: str | None = None,
|
||||
file_name: str,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
doc_name: str | None,
|
||||
regulation_type: str,
|
||||
version: str,
|
||||
generate_summary: bool,
|
||||
) -> DocumentProcessResult:
|
||||
"""Handle upload and process for the Document Command Service instance."""
|
||||
doc_id = doc_id or str(uuid.uuid4())[:8]
|
||||
final_doc_name = doc_name or file_name
|
||||
object_name = f"{doc_id}/{file_name}"
|
||||
|
||||
document = Document(
|
||||
doc_id=doc_id,
|
||||
doc_name=final_doc_name,
|
||||
file_name=file_name,
|
||||
object_name=object_name,
|
||||
content_type=content_type,
|
||||
size_bytes=len(content),
|
||||
regulation_type=regulation_type,
|
||||
version=version,
|
||||
metadata={"generate_summary": generate_summary},
|
||||
)
|
||||
self.document_repository.create(document)
|
||||
|
||||
temp_path = ""
|
||||
try:
|
||||
self.binary_store.save(
|
||||
object_name=object_name,
|
||||
data=content,
|
||||
content_type=content_type,
|
||||
metadata={"doc_id": doc_id},
|
||||
)
|
||||
self.document_repository.update_status(doc_id, DocumentStatus.STORED)
|
||||
|
||||
suffix = os.path.splitext(file_name)[1]
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
||||
temp_file.write(content)
|
||||
temp_path = temp_file.name
|
||||
|
||||
parsed_document = self.parser.parse(
|
||||
file_path=temp_path,
|
||||
doc_id=doc_id,
|
||||
doc_name=final_doc_name,
|
||||
)
|
||||
self.document_repository.update_status(
|
||||
doc_id,
|
||||
DocumentStatus.PARSED,
|
||||
parser_name=parsed_document.parser_name,
|
||||
metadata={"structure_nodes": len(parsed_document.structure_nodes)},
|
||||
)
|
||||
|
||||
chunks = self.chunk_builder.build(
|
||||
parsed_document=parsed_document,
|
||||
regulation_type=regulation_type,
|
||||
version=version,
|
||||
)
|
||||
if not chunks:
|
||||
raise ValueError("解析完成但没有生成可入库的 chunks")
|
||||
|
||||
vectors = self.embedding_provider.embed_texts([chunk.embedding_text for chunk in chunks])
|
||||
inserted = self.vector_index.upsert(chunks, vectors)
|
||||
if inserted != len(chunks):
|
||||
logger.warning("Milvus upsert count mismatched: inserted={}, chunks={}", inserted, len(chunks))
|
||||
|
||||
self.document_repository.update_status(
|
||||
doc_id,
|
||||
DocumentStatus.INDEXED,
|
||||
chunk_count=len(chunks),
|
||||
summary="",
|
||||
summary_latency_ms=0,
|
||||
index_name=self.vector_index.health().get("collection_name", ""),
|
||||
)
|
||||
stored = self.document_repository.get(doc_id)
|
||||
return DocumentProcessResult(
|
||||
doc_id=doc_id,
|
||||
doc_name=final_doc_name,
|
||||
status=(stored.status.value if stored else DocumentStatus.INDEXED.value),
|
||||
message="处理成功",
|
||||
num_chunks=len(chunks),
|
||||
summary=stored.summary if stored else "",
|
||||
summary_latency_ms=stored.summary_latency_ms if stored else 0,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("文档处理失败: doc_id={}", doc_id)
|
||||
self.document_repository.update_status(
|
||||
doc_id,
|
||||
DocumentStatus.FAILED,
|
||||
error_message=str(exc),
|
||||
)
|
||||
return DocumentProcessResult(
|
||||
doc_id=doc_id,
|
||||
doc_name=final_doc_name,
|
||||
status=DocumentStatus.FAILED.value,
|
||||
message=f"文档处理失败: {exc}",
|
||||
)
|
||||
finally:
|
||||
if temp_path and os.path.exists(temp_path):
|
||||
try:
|
||||
os.remove(temp_path)
|
||||
except OSError:
|
||||
logger.warning("临时文件清理失败: {}", temp_path)
|
||||
|
||||
|
||||
class DocumentQueryService:
|
||||
"""Provide the Document Query Service service."""
|
||||
def __init__(self, *, document_repository: DocumentRepository, binary_store: DocumentBinaryStore) -> None:
|
||||
"""Initialize the Document Query Service instance."""
|
||||
self.document_repository = document_repository
|
||||
self.binary_store = binary_store
|
||||
|
||||
def get(self, doc_id: str) -> Document | None:
|
||||
"""Handle get for the Document Query Service instance."""
|
||||
return self.document_repository.get(doc_id)
|
||||
|
||||
def list_documents(self, limit: int | None = None) -> list[Document]:
|
||||
"""List documents for the Document Query Service instance."""
|
||||
return self.document_repository.list(limit=limit)
|
||||
|
||||
def download(self, doc_id: str) -> tuple[Document, bytes]:
|
||||
"""Handle download for the Document Query Service instance."""
|
||||
document = self.document_repository.get(doc_id)
|
||||
if not document:
|
||||
raise FileNotFoundError(f"文档不存在: {doc_id}")
|
||||
return document, self.binary_store.read(document.object_name)
|
||||
7
backend/app/application/knowledge/__init__.py
Normal file
7
backend/app/application/knowledge/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Initialize the app.application.knowledge package."""
|
||||
|
||||
from .services import KnowledgeRetrievalService
|
||||
# Keep package boundaries explicit so backend imports stay predictable.
|
||||
|
||||
|
||||
__all__ = ["KnowledgeRetrievalService"]
|
||||
19
backend/app/application/knowledge/services.py
Normal file
19
backend/app/application/knowledge/services.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Implement application-layer logic for services."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.domain.retrieval import RetrievalQuery, Retriever, RetrievedChunk
|
||||
# Keep orchestration logic centralized so use-case flow stays easy to trace.
|
||||
|
||||
|
||||
|
||||
class KnowledgeRetrievalService:
|
||||
"""Provide the Knowledge Retrieval Service service."""
|
||||
def __init__(self, *, retriever: Retriever) -> None:
|
||||
"""Initialize the Knowledge Retrieval Service instance."""
|
||||
self.retriever = retriever
|
||||
|
||||
def retrieve(self, *, query: str, top_k: int, filters: str | None = None) -> list[RetrievedChunk]:
|
||||
"""Handle retrieve for the Knowledge Retrieval Service instance."""
|
||||
retrieval_query = RetrievalQuery(query=query, top_k=top_k, filters=filters)
|
||||
return self.retriever.retrieve(retrieval_query)
|
||||
Reference in New Issue
Block a user