2026-05-18 16:32:42 +08:00
|
|
|
"""新架构下的检索与 Milvus dense-only 约定测试。"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from app.application.agent.services import AgentConversationService
|
|
|
|
|
from app.application.knowledge.services import KnowledgeRetrievalService
|
|
|
|
|
from app.domain.conversation.models import AnswerResult, AnswerSource, ConversationSession
|
|
|
|
|
from app.domain.retrieval import RetrievalQuery, RetrievedChunk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FakeRetriever:
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
self.queries: list[RetrievalQuery] = []
|
|
|
|
|
|
|
|
|
|
def retrieve(self, query: RetrievalQuery) -> list[RetrievedChunk]:
|
|
|
|
|
self.queries.append(query)
|
|
|
|
|
return [
|
|
|
|
|
RetrievedChunk(
|
|
|
|
|
chunk_id="chunk-1",
|
|
|
|
|
doc_id="doc-1",
|
2026-05-26 20:21:31 +08:00
|
|
|
doc_title="测试法规",
|
|
|
|
|
text="法规正文",
|
2026-05-18 16:32:42 +08:00
|
|
|
score=0.91,
|
|
|
|
|
section_title="第一章",
|
2026-05-26 20:21:31 +08:00
|
|
|
page_start=1,
|
2026-05-18 16:32:42 +08:00
|
|
|
metadata={"section_title": "第一章"},
|
2026-04-28 11:29:33 +08:00
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
def search(self, query: str, top_k: int, filters: str | None = None) -> list[RetrievedChunk]:
|
|
|
|
|
return self.retrieve(RetrievalQuery(query=query, top_k=top_k, filters=filters))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FakeAnswerGenerator:
|
|
|
|
|
def generate(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
query: str,
|
|
|
|
|
retrieved_chunks: list[RetrievedChunk],
|
|
|
|
|
history: list[dict[str, str]] | None = None,
|
|
|
|
|
provider: str | None = None,
|
|
|
|
|
model: str | None = None,
|
|
|
|
|
prompt_template: str | None = None,
|
|
|
|
|
) -> AnswerResult:
|
|
|
|
|
return AnswerResult(
|
|
|
|
|
answer=f"回答: {query}",
|
|
|
|
|
sources=[
|
|
|
|
|
AnswerSource(
|
|
|
|
|
doc_id=item.doc_id,
|
2026-05-26 20:21:31 +08:00
|
|
|
doc_title=item.doc_title,
|
2026-05-18 16:32:42 +08:00
|
|
|
chunk_id=item.chunk_id,
|
|
|
|
|
section_title=item.section_title,
|
2026-05-26 20:21:31 +08:00
|
|
|
page_start=item.page_start,
|
2026-05-18 16:32:42 +08:00
|
|
|
score=item.score,
|
2026-05-26 20:21:31 +08:00
|
|
|
text=item.text,
|
2026-05-18 16:32:42 +08:00
|
|
|
metadata=item.metadata,
|
|
|
|
|
)
|
|
|
|
|
for item in retrieved_chunks
|
|
|
|
|
],
|
|
|
|
|
model=model or "deepseek-v4-flash",
|
|
|
|
|
latency_ms=12,
|
|
|
|
|
retrieved_count=len(retrieved_chunks),
|
|
|
|
|
context_tokens=128,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def stream_generate(self, **kwargs):
|
|
|
|
|
sources = [source.__dict__ for source in self.generate(**kwargs).sources]
|
|
|
|
|
yield {"event": "sources", "data": sources}
|
|
|
|
|
yield {"event": "content", "data": "流式回答"}
|
|
|
|
|
yield {"event": "done", "data": {"retrieved_count": 1}}
|
2026-04-28 11:29:33 +08:00
|
|
|
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
class FakeConversationStore:
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
self.sessions: dict[str, ConversationSession] = {}
|
2026-04-28 11:29:33 +08:00
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
def create_session(self, metadata: dict | None = None) -> ConversationSession:
|
|
|
|
|
session = ConversationSession(session_id="sess-1", created_at=1, updated_at=1, metadata=metadata or {})
|
|
|
|
|
self.sessions[session.session_id] = session
|
|
|
|
|
return session
|
2026-04-28 11:29:33 +08:00
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
def get_session(self, session_id: str) -> ConversationSession | None:
|
|
|
|
|
return self.sessions.get(session_id)
|
2026-04-28 11:29:33 +08:00
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
def save_message(self, session_id: str, *, role: str, content: str, sources: list[dict] | None = None):
|
|
|
|
|
session = self.sessions.get(session_id)
|
|
|
|
|
if session is None:
|
|
|
|
|
return None
|
|
|
|
|
session.messages.append(type("Msg", (), {"role": role, "content": content})())
|
|
|
|
|
return session
|
2026-04-28 11:29:33 +08:00
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
def delete_session(self, session_id: str) -> bool:
|
|
|
|
|
return self.sessions.pop(session_id, None) is not None
|
2026-04-28 11:29:33 +08:00
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
def list_sessions(self) -> list[dict]:
|
|
|
|
|
return [{"session_id": key, "message_count": len(value.messages), "created_at": value.created_at, "updated_at": value.updated_at} for key, value in self.sessions.items()]
|
2026-04-28 11:29:33 +08:00
|
|
|
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
def test_knowledge_retrieval_service_builds_retrieval_query():
|
|
|
|
|
retriever = FakeRetriever()
|
|
|
|
|
service = KnowledgeRetrievalService(retriever=retriever)
|
2026-04-28 11:29:33 +08:00
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
results = service.retrieve(query="机动车安全", top_k=3, filters='doc_name == "测试法规"')
|
2026-04-28 11:29:33 +08:00
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
assert len(results) == 1
|
|
|
|
|
assert retriever.queries[0].query == "机动车安全"
|
|
|
|
|
assert retriever.queries[0].top_k == 3
|
|
|
|
|
assert retriever.queries[0].filters == 'doc_name == "测试法规"'
|
2026-04-28 11:29:33 +08:00
|
|
|
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
def test_agent_conversation_service_reuses_shared_retrieval_service():
|
|
|
|
|
retriever = FakeRetriever()
|
|
|
|
|
retrieval_service = KnowledgeRetrievalService(retriever=retriever)
|
|
|
|
|
conversation_store = FakeConversationStore()
|
|
|
|
|
service = AgentConversationService(
|
|
|
|
|
retrieval_service=retrieval_service,
|
|
|
|
|
answer_generator=FakeAnswerGenerator(),
|
|
|
|
|
conversation_store=conversation_store,
|
|
|
|
|
)
|
2026-04-28 11:29:33 +08:00
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
session_id, result = service.chat(query="问一个问题", top_k=2, model="qwen3.5-flash")
|
2026-04-28 11:29:33 +08:00
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
assert session_id == "sess-1"
|
|
|
|
|
assert result.answer == "回答: 问一个问题"
|
|
|
|
|
assert result.retrieved_count == 1
|
|
|
|
|
assert retriever.queries[0].top_k == 2
|
|
|
|
|
assert len(conversation_store.sessions["sess-1"].messages) == 2
|