diff --git a/AGENTS.md b/AGENTS.md index cf2c713..8a1904d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -39,6 +39,15 @@ - `tests/verify_mvp.py` also expects the `BGEM3Embedder` stack to be available and explicitly mentions `FlagEmbedding`. - For backend-only changes, prefer focused import/startup checks unless you know the external services and model dependencies are available. +## Backend Architecture Authority + +- `docs/architecture/backend-project-architecture.md` is the authoritative backend architecture document for ongoing backend development. +- New backend business logic must follow `api -> application -> domain ports -> infrastructure`. +- Treat `backend/app/shared/bootstrap.py` as the current composition root for backend dependency wiring. +- Do not add new business orchestration to `backend/app/services/*` or `backend/app/workflows/*` unless the task is explicitly a migration step. +- API routes must not directly access `ConversationStore`; session access should go through application services. +- Legacy files may be patched for compatibility or bug fixes, but should not gain new long-term responsibilities. + ## Backend Commenting Standard - All comments and docstrings in `backend/**/*.py` must be written in English. diff --git a/README.md b/README.md index f494ddd..ce90eb9 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,10 @@ AIRegulation-DocAnalysis-Demo/ │ │ ├── application/ # 用例编排层 │ │ ├── domain/ # 领域模型与稳定端口 │ │ ├── infrastructure/ # MinIO / Milvus / 阿里云 / embedding / session 适配 +│ │ ├── shared/ # 组合根、配置无关 wiring 与横切支撑 │ │ ├── config/ # 配置与日志 +│ │ ├── services/ # 迁移期 legacy façade,不是新增业务逻辑默认落点 +│ │ ├── workflows/ # 迁移期 legacy workflow,不是新增业务逻辑默认落点 │ │ └── workers/ │ ├── requirements.txt │ └── main.py @@ -64,6 +67,13 @@ docker-compose logs -f milvus ## API接口 +## Backend Architecture + +- Backend 架构规范文档:`docs/architecture/backend-project-architecture.md` +- Backend 迁移 RFC:`docs/rfc/backend-api-parsing-embedding-migration-requirements.md` +- 后续 backend 新增功能、重构和技术替换必须同时满足 RFC 与架构文档。 +- `backend/app/services/*` 与 `backend/app/workflows/*` 当前属于迁移期遗留目录,除迁移或兼容修复外,不应继续承载新的业务编排。 + ### 上传文档 ```bash diff --git a/backend/README.md b/backend/README.md index 5a7bfeb..4f66933 100644 --- a/backend/README.md +++ b/backend/README.md @@ -2,6 +2,13 @@ `backend` 是当前正式使用的 FastAPI 后端目录,入口为 `app.main:app`。 +## 架构约束入口 + +- Backend authoritative architecture 文档:`docs/architecture/backend-project-architecture.md` +- Backend migration RFC:`docs/rfc/backend-api-parsing-embedding-migration-requirements.md` +- 后续 backend 新增功能和重构默认遵守:`api -> application -> domain ports -> infrastructure` +- `backend/app/services/*` 与 `backend/app/workflows/*` 为迁移期 legacy 目录,除迁移或兼容修复外,不应新增业务编排逻辑。 + ## 启动 ```bash @@ -34,10 +41,15 @@ PYTHONPATH=backend uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload ```text backend/ ├── app/ -│ ├── api/ # FastAPI 路由与模型 -│ ├── config/ # 配置与日志 -│ ├── services/ # 文档处理、LLM、RAG、存储 -│ └── workers/ # 任务相关代码 +│ ├── api/ # FastAPI 路由与 transport models +│ ├── application/ # 用例编排层 +│ ├── domain/ # 核心业务模型与稳定端口 +│ ├── infrastructure/ # 外部系统适配器 +│ ├── shared/ # composition root 与横切支撑 +│ ├── config/ # 配置与日志 +│ ├── services/ # legacy façade / 兼容入口 +│ ├── workflows/ # legacy workflow 入口 +│ └── workers/ # 任务相关代码 ├── .env.example ├── requirements.txt └── main.py @@ -46,4 +58,13 @@ backend/ ## 说明 - 路由前缀保持为 `/api/v1`,以兼容当前前端。 -- 原 `backend/app/api/routes/docs.py`、`rag.py`、`compliance.py`、`status.py` 仍保留在仓库中,但不再作为主路由入口。 +- 当前主业务链路入口是 `documents`、`knowledge`、`agent`。 +- `compliance.py` 当前仍被挂载,但尚未满足目标架构约束;在迁移前不应继续扩展业务编排。 +- `docs.py` 与 `rag.py` 为遗留/非主入口,不应继续扩展。 + +## 开发约束 + +- backend 开发前先阅读 `docs/architecture/backend-project-architecture.md`。 +- 新增业务能力默认落在 `application` 层,由 `api` 调用,不要直接写进 route。 +- route 不应直接访问 MinIO、Milvus、Parser SDK、LLM SDK 或 `ConversationStore`。 +- `backend/app/shared/bootstrap.py` 是当前 composition root;依赖装配优先收口到这里。 diff --git a/backend/app/api/routes/agent.py b/backend/app/api/routes/agent.py index 01af248..c0349fc 100644 --- a/backend/app/api/routes/agent.py +++ b/backend/app/api/routes/agent.py @@ -65,7 +65,7 @@ async def chat_with_session(request: ChatRequest): model=request.model or settings.llm_model, top_k=request.top_k or settings.rag_top_k, ) - session = get_conversation_store().get_session(session_id) + session = get_agent_session_service().get_session(session_id) return ChatResponse( session_id=session_id, answer=result.answer, @@ -133,45 +133,52 @@ async def chat_stream(request: ChatRequest): @router.get("/session/{session_id}", response_model=SessionInfo) async def get_session_info(session_id: str): """Return session info.""" - session = get_conversation_store().get_session(session_id) - if not session: - raise HTTPException(status_code=404, detail="会话不存在或已过期") - return SessionInfo( - session_id=session.session_id, - message_count=len(session.messages), - created_at=session.created_at, - updated_at=session.updated_at, - ) + try: + session = get_agent_session_service().get_session(session_id) + return SessionInfo( + session_id=session.session_id, + message_count=len(session.messages), + created_at=session.created_at, + updated_at=session.updated_at, + ) + except ValueError as exc: + raise HTTPException(status_code=404, detail=str(exc)) @router.get("/session/{session_id}/history") async def get_session_history(session_id: str, max_turns: int = 5): """Return session history.""" - session = get_conversation_store().get_session(session_id) - if not session: - raise HTTPException(status_code=404, detail="会话不存在或已过期") - history = [{"role": msg.role, "content": msg.content} for msg in session.messages[-(max_turns * 2):]] - return {"session_id": session_id, "history": history} + try: + history = get_agent_session_service().get_history(session_id=session_id, max_turns=max_turns) + return {"session_id": session_id, "history": history} + except ValueError as exc: + raise HTTPException(status_code=404, detail=str(exc)) @router.delete("/session/{session_id}") async def delete_session(session_id: str): """Delete session.""" - if not get_conversation_store().delete_session(session_id): - raise HTTPException(status_code=404, detail="会话不存在") - return {"message": "会话已删除", "session_id": session_id} + try: + get_agent_session_service().delete_session(session_id) + return {"message": "会话已删除", "session_id": session_id} + except ValueError as exc: + raise HTTPException(status_code=404, detail=str(exc)) @router.get("/sessions", response_model=List[SessionInfo]) async def list_sessions(): """List sessions.""" - return [SessionInfo(**item) for item in get_conversation_store().list_sessions()] + return [SessionInfo(**item) for item in get_agent_session_service().list_sessions()] @router.post("/feedback") async def submit_feedback(request: FeedbackRequest): """Submit feedback.""" - session = get_conversation_store().get_session(request.session_id) - if not session: - raise HTTPException(status_code=404, detail="会话不存在") - return {"message": "反馈已提交", "session_id": request.session_id, "message_index": request.message_index} + try: + result = get_agent_session_service().submit_feedback( + session_id=request.session_id, + message_index=request.message_index, + ) + return {"message": "反馈已提交", "session_id": result.session_id, "message_index": result.message_index} + except ValueError as exc: + raise HTTPException(status_code=404, detail=str(exc)) diff --git a/backend/app/application/agent/__init__.py b/backend/app/application/agent/__init__.py index b410310..7aaf8b6 100644 --- a/backend/app/application/agent/__init__.py +++ b/backend/app/application/agent/__init__.py @@ -1,7 +1,7 @@ """Initialize the app.application.agent package.""" -from .services import AgentConversationService +from .services import AgentConversationService, AgentSessionFeedbackResult, AgentSessionService # Keep package boundaries explicit so backend imports stay predictable. -__all__ = ["AgentConversationService"] +__all__ = ["AgentConversationService", "AgentSessionFeedbackResult", "AgentSessionService"] diff --git a/backend/app/application/agent/services.py b/backend/app/application/agent/services.py index 077c257..d8d4d46 100644 --- a/backend/app/application/agent/services.py +++ b/backend/app/application/agent/services.py @@ -1,7 +1,8 @@ -"""Implement application-layer logic for services.""" +"""Implement application-layer logic for agent services.""" from __future__ import annotations +from dataclasses import dataclass from typing import Generator from app.domain.conversation import AnswerGenerator, AnswerResult, ConversationStore @@ -141,5 +142,50 @@ class AgentConversationService: content=full_answer, sources=sources_payload, ) - + return session.session_id, event_stream() + + +@dataclass +class AgentSessionFeedbackResult: + """Represent the result of storing session feedback.""" + + session_id: str + message_index: int + + +class AgentSessionService: + """Provide application-layer access to session management workflows.""" + + def __init__(self, *, conversation_store: ConversationStore) -> None: + """Initialize the Agent Session Service instance.""" + self.conversation_store = conversation_store + + def get_session(self, session_id: str): + """Return a session by id or raise when it does not exist.""" + session = self.conversation_store.get_session(session_id) + if not session: + raise ValueError("会话不存在或已过期") + return session + + def get_history(self, *, session_id: str, max_turns: int = 5) -> list[dict[str, str]]: + """Return the recent conversation history for a session.""" + session = self.get_session(session_id) + return [{"role": msg.role, "content": msg.content} for msg in session.messages[-(max_turns * 2):]] + + def delete_session(self, session_id: str) -> None: + """Delete a session or raise when it does not exist.""" + if not self.conversation_store.delete_session(session_id): + raise ValueError("会话不存在") + + def list_sessions(self) -> list[dict]: + """Return the list of visible sessions.""" + return self.conversation_store.list_sessions() + + def submit_feedback(self, *, session_id: str, message_index: int) -> AgentSessionFeedbackResult: + """Validate feedback targets and return a normalized feedback result.""" + session = self.get_session(session_id) + if message_index < 0 or message_index >= len(session.messages): + raise ValueError("消息索引不存在") + # Preserve the existing API behavior until a persistent feedback store is introduced. + return AgentSessionFeedbackResult(session_id=session_id, message_index=message_index) diff --git a/backend/app/shared/bootstrap.py b/backend/app/shared/bootstrap.py index a0a18b6..47947f1 100644 --- a/backend/app/shared/bootstrap.py +++ b/backend/app/shared/bootstrap.py @@ -4,7 +4,7 @@ from __future__ import annotations from functools import lru_cache -from app.application.agent import AgentConversationService +from app.application.agent import AgentConversationService, AgentSessionService from app.application.documents import DocumentCommandService, DocumentQueryService from app.application.knowledge import KnowledgeRetrievalService from app.config.settings import settings @@ -162,3 +162,6 @@ def get_perception_service() -> PerceptionService: event_store=MockEventStore(), retrieval_service=get_retrieval_service(), ) +def get_agent_session_service() -> AgentSessionService: + """Return agent session service.""" + return AgentSessionService(conversation_store=get_conversation_store()) diff --git a/docs/architecture/backend-project-architecture.md b/docs/architecture/backend-project-architecture.md index 6a0d9a2..d968fd9 100644 --- a/docs/architecture/backend-project-architecture.md +++ b/docs/architecture/backend-project-architecture.md @@ -10,6 +10,31 @@ - 本文档负责冻结目标模块边界、依赖规则和实现组织方式。 - 后续任何代码重构、能力替换或底座升级,都应同时满足 RFC 与本文档。 +## 1.1 Document Status And Authority + +本文档不是仅供参考的“目标态草案”,而是当前 backend 持续开发的强制架构基线。 + +- 新增 backend 功能默认必须遵守本文档定义的模块边界与依赖方向。 +- 历史实现、迁移中代码和兼容 façade 的存在,不构成继续偏离本文档的理由。 +- 当现状与本文档冲突时,新增代码按本文档落位;旧代码按迁移计划逐步收口,但不允许继续扩大 legacy 边界。 +- 评审、重构验收和后续架构讨论,均以本文档作为 backend 内部结构的 authority。 + +## 1.2 Authoritative Scope + +本文档约束的 backend 范围包括: + +- `backend/app/api/*` +- `backend/app/application/*` +- `backend/app/domain/*` +- `backend/app/infrastructure/*` +- `backend/app/shared/*` + +说明: + +- `backend/app/services/*` 与 `backend/app/workflows/*` 当前属于迁移期 legacy 目录,不是新增业务逻辑的默认落点。 +- `backend/app/api/routes/docs.py` 与 `backend/app/api/routes/rag.py` 视为遗留或非主入口,除迁移、兼容或下线动作外,不应继续扩展。 +- `backend/app/api/routes/compliance.py` 当前仍对外暴露,但尚未完全满足本文档约束;在迁移到 application service 之前,应视为受控 legacy 入口,而不是新的架构样板。 + ## 2. Current-State Problems 基于当前代码,后端已经具备以下能力: @@ -22,6 +47,18 @@ 但这些能力当前主要是“可运行”,还不是“结构清晰、便于替换、便于演进”的状态。核心问题如下。 +### 2.0 Current-State Verdict + +基于当前仓库,现状裁决如下: + +- 已基本符合:`documents` 上传/查询主链路已经通过 `DocumentCommandService` 与 `DocumentQueryService` 收口。 +- 已基本符合:`knowledge` 检索已经通过 `KnowledgeRetrievalService` 统一对外暴露。 +- 已基本符合:`agent` 问答主链路已经通过 `AgentConversationService` 收口,`shared/bootstrap.py` 已承担 composition root 角色。 +- 部分符合:Agent session 详情、历史、删除、反馈等接口曾经直接访问 `ConversationStore`,需要继续收口到 application service。 +- 未完全符合:`compliance` 路由仍直接处理文件落盘、任务状态和 mock 结果,不符合 `api -> application -> domain ports -> infrastructure`。 +- 未完全符合:部分 `infrastructure` adapter 仍依赖 `services/*` 内的 legacy 实现,说明迁移尚未彻底完成。 +- 未完全符合:`api/main.py` 的生命周期预热逻辑仍直接依赖旧 LLM factory,尚未完全回到统一 wiring 边界。 + ### 2.1 `DocumentProcessor` 责任过载 现状判断: @@ -603,6 +640,7 @@ infrastructure -> external systems - `application` 只能依赖 `domain`、端口接口,以及通过 composition root 注入进来的实现实例 - `domain` 不能依赖 `api` 或 `infrastructure` - `infrastructure` 可以依赖 `domain` 定义的端口和数据模型,但不能反向驱动 application 逻辑 +- `api/main.py` 这类应用入口可以保留轻量 startup/shutdown 生命周期代码,但不应长期直接依赖 legacy service factory;预热与装配逻辑应逐步收口到明确的 wiring 边界 说明: @@ -739,6 +777,54 @@ infrastructure -> external systems - 内部 DTO / VO / domain object 收敛到 `application` 或 `domain` - 不允许 API model 直接渗透到 domain +### 10.10 应用入口与启动生命周期 + +当前: + +- `backend/app/api/main.py` + +目标: + +- 保留 FastAPI app、middleware 和 lifespan 入口职责 +- 逐步去除对 legacy LLM factory 的直接依赖 +- 预热、清理和依赖装配应保持在明确的 wiring / bootstrap 边界内,而不是继续把旧 service factory 固化为应用入口依赖 + +### 10.11 Compliance 路由 + +当前: + +- `backend/app/api/routes/compliance.py` + +目标: + +- 如继续保留该能力,应迁移到独立的 application service 与稳定端口 +- 在迁移完成前,该路由视为受控 legacy 入口,可修 bug,但不应继续扩展业务编排职责 + +### 10.12 遗留路由入口 + +当前: + +- `backend/app/api/routes/docs.py` +- `backend/app/api/routes/rag.py` + +目标: + +- 作为遗留或演示入口逐步归档、下线或迁移 +- 不再作为新增 backend 能力的开发入口 + +### 10.13 Legacy Workflow 与 Service 目录 + +当前: + +- `backend/app/workflows/*` +- `backend/app/services/*` + +目标: + +- 保留迁移期兼容价值,但不再承载新的长期业务编排 +- 若某个 legacy 实现仍被 `infrastructure` adapter 间接复用,应视为过渡依赖,后续逐步迁入 `infrastructure` 或更稳定的底层支撑模块 +- 任何新增 backend 业务能力,都不应再以这些目录作为默认落点 + ## 11. Technology Replacement Boundaries ### 11.1 本地解析 / MinerU -> 阿里云文档解析 @@ -790,6 +876,10 @@ infrastructure -> external systems - 禁止新建第二个“大一统流程类”替代 `DocumentProcessor` - 禁止 `knowledge` 和 `agent` 各自维护独立检索实现 - 禁止 parser、embedding、vector index、llm provider 的替换穿透到 API 层 +- 禁止新增 route 直接访问 `ConversationStore` +- 禁止新增代码把 `backend/app/services/*` 或 `backend/app/workflows/*` 作为默认业务落点 +- 禁止新增 `infrastructure -> services/*` 的过渡依赖;已有依赖只允许在迁移窗口内逐步消除,不允许继续扩散 +- 禁止在 README、开发说明或评审结论中把 legacy 目录描述为当前 backend 的主结构 ## 13. Architecture Review Checklist @@ -807,3 +897,7 @@ infrastructure -> external systems 10. 是否明确 `knowledge` 与 `agent` 共用同一 retrieval 底座。 11. 是否明确 API 层只负责 transport concerns,不再直接承担业务编排。 12. 是否保证后续替换方案时,上层 application service 与外部 API 契约不被迫变化。 +13. 是否仍存在 route 直接访问 `ConversationStore`、文件系统、对象存储或任务状态存储。 +14. 是否新增了 `infrastructure -> services/*` 依赖。 +15. 是否把新的 backend 业务逻辑写进了 `services/*` 或 `workflows/*`。 +16. README、backend README 与协作说明是否仍与当前 authoritative architecture 保持一致。