1. Add 登陆功能

2. 调整字体大小 3. 新增部分功能
2026-06-05 18:00:31 +08:00
parent 06e0967128
commit 9fea9c6a53
58 changed files with 5028 additions and 322 deletions
--- a/backend/app/infrastructure/tasks/init.py
+++ b/backend/app/infrastructure/tasks/init.py
@@ -0,0 +1,5 @@
+"""Celery task definitions for background processing.
+
+This package exposes the shared Celery application instance and all
+registered task functions used by API routes to enqueue work.
+"""
--- a/backend/app/infrastructure/tasks/celery_app.py
+++ b/backend/app/infrastructure/tasks/celery_app.py
@@ -0,0 +1,45 @@
+"""Shared Celery application instance for background task processing.
+
+All workers and enqueueing call sites import `celery_app` from this module
+so the broker/backend configuration stays in one place.
+"""
+
+from __future__ import annotations
+
+from celery import Celery
+
+from app.config.settings import settings
+
+
+def _redis_url() -> str:
+    """Return a Redis connection URL from application settings."""
+    if settings.redis_password:
+        return (
+            f"redis://:{settings.redis_password}@"
+            f"{settings.redis_host}:{settings.redis_port}/{settings.redis_db}"
+        )
+    return f"redis://{settings.redis_host}:{settings.redis_port}/{settings.redis_db}"
+
+
+_BROKER = _redis_url()
+_BACKEND = _redis_url()
+
+celery_app = Celery(
+    "compliance_hub",
+    broker=_BROKER,
+    backend=_BACKEND,
+    include=["app.infrastructure.tasks.document_tasks"],
+)
+
+celery_app.conf.update(
+    task_serializer="json",
+    result_serializer="json",
+    accept_content=["json"],
+    timezone="UTC",
+    enable_utc=True,
+    # Acknowledge task only after successful execution to avoid data loss.
+    task_acks_late=True,
+    task_reject_on_worker_lost=True,
+    # Keep results for 1 hour for status polling.
+    result_expires=3600,
+)
--- a/backend/app/infrastructure/tasks/document_tasks.py
+++ b/backend/app/infrastructure/tasks/document_tasks.py
@@ -0,0 +1,73 @@
+"""Celery tasks for document processing.
+
+Each task is a thin wrapper that retrieves the already-stored document
+binary and delegates to DocumentCommandService._process_document.
+The task does not accept raw file bytes — it reads them from the binary
+store using the doc_id, so the Celery message payload stays small.
+"""
+
+from __future__ import annotations
+
+from loguru import logger
+
+from app.infrastructure.tasks.celery_app import celery_app
+
+
+@celery_app.task(
+    name="app.infrastructure.tasks.document_tasks.process_document_task",
+    bind=True,
+    max_retries=3,
+    default_retry_delay=30,
+    acks_late=True,
+)
+def process_document_task(
+    self,
+    doc_id: str,
+    file_name: str,
+    doc_name: str,
+    regulation_type: str,
+    version: str,
+    generate_summary: bool,
+    run_id: str | None = None,
+) -> dict:
+    """Parse, embed, and index a document that has already been stored.
+
+    The task reads the file binary from MinIO using doc_id so the Celery
+    message stays small. Retries up to 3 times with a 30-second delay on
+    transient infrastructure errors.
+    """
+    # Import inside the task function to avoid pickling issues and to ensure
+    # that each worker process initialises its own bootstrap singletons.
+    from app.shared.bootstrap import get_document_command_service, get_document_query_service
+
+    logger.info("process_document_task started: doc_id={}", doc_id)
+    try:
+        svc = get_document_command_service()
+        doc = get_document_query_service().get(doc_id)
+        if not doc:
+            raise ValueError(f"Document record not found: {doc_id}")
+
+        # Read the stored binary from MinIO — avoids passing raw bytes in the task message.
+        content = svc.binary_store.read(doc.object_name)
+
+        result = svc._process_document(
+            doc_id=doc_id,
+            file_name=file_name,
+            final_doc_name=doc_name,
+            content=content,
+            regulation_type=regulation_type,
+            version=version,
+            generate_summary=generate_summary,
+            run_id=run_id,
+        )
+        logger.info(
+            "process_document_task completed: doc_id={} status={} chunks={}",
+            doc_id, result.status, result.num_chunks,
+        )
+        return {"doc_id": result.doc_id, "status": result.status, "num_chunks": result.num_chunks}
+
+    except Exception as exc:
+        logger.exception("process_document_task failed: doc_id={}", doc_id)
+        # Retry on transient errors; permanent errors (bad file, parse failure)
+        # will exhaust retries and leave the document in FAILED state.
+        raise self.retry(exc=exc)