""" Post-migration backend smoke checks. Purpose: 1. Verify the new architecture modules can be imported 2. Verify migration-critical config matches the RFC 3. Verify external dependencies when they are available 4. Optionally verify the real ingest path with a sample document """ from __future__ import annotations import argparse import sys from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(PROJECT_ROOT / "backend")) from loguru import logger from app.config.logging import setup_logging from app.config.settings import settings from app.shared.bootstrap import ( get_document_command_service, get_retrieval_service, get_vector_index, ) setup_logging(level="INFO") def verify_service_wiring() -> bool: """Verify the new module layout and service entrypoints can be imported.""" logger.info("=" * 60) logger.info("Step 1: verify module wiring") logger.info("=" * 60) try: from app.api.main import app from app.application.agent import AgentConversationService from app.application.documents import DocumentCommandService, DocumentQueryService from app.application.knowledge import KnowledgeRetrievalService from app.shared import bootstrap assert app is not None assert DocumentCommandService is not None assert DocumentQueryService is not None assert KnowledgeRetrievalService is not None assert AgentConversationService is not None assert bootstrap is not None logger.success("module wiring ok") return True except Exception as exc: logger.error(f"module wiring failed: {exc}") return False def verify_migration_config() -> bool: """Verify migration-critical config values.""" logger.info("=" * 60) logger.info("Step 2: verify migration config") logger.info("=" * 60) try: assert settings.embedding_model == "text-embedding-v3" assert settings.embedding_dim == 1024 assert settings.milvus_collection == "regulations_dense_1024_v2" assert settings.parser_backend == "aliyun" assert settings.chunk_backend == "aliyun" logger.info(f"embedding_model={settings.embedding_model}") logger.info(f"embedding_base_url={settings.embedding_base_url}") logger.info(f"embedding_dim={settings.embedding_dim}") logger.info(f"milvus_collection={settings.milvus_collection}") logger.info(f"parser_backend={settings.parser_backend}") logger.info(f"chunk_backend={settings.chunk_backend}") logger.success("migration config ok") return True except Exception as exc: logger.error(f"migration config mismatch: {exc}") return False def verify_minio_connection() -> bool: """Verify MinIO connectivity for the binary store path.""" logger.info("=" * 60) logger.info("Step 3: verify MinIO connection") logger.info("=" * 60) try: binary_store = get_document_command_service().binary_store assert binary_store is not None logger.success("MinIO connection ok") return True except Exception as exc: logger.error(f"MinIO connection failed: {exc}") logger.info("start MinIO first or update .env storage settings") return False def verify_milvus_connection() -> bool: """Verify dense-only Milvus adapter connectivity.""" logger.info("=" * 60) logger.info("Step 4: verify Milvus connection") logger.info("=" * 60) try: health = get_vector_index().health() logger.info(f"Milvus health: {health}") logger.success("Milvus connection ok") return True except Exception as exc: logger.error(f"Milvus connection failed: {exc}") logger.info("start Milvus first or update .env vector settings") return False def verify_ingest_pipeline(sample_file: Path) -> bool: """Verify upload -> parse -> embed -> index using a real file.""" logger.info("=" * 60) logger.info("Step 5: verify real ingest pipeline") logger.info("=" * 60) if not sample_file.exists(): logger.error(f"sample file not found: {sample_file}") return False if sample_file.suffix.lower() not in {".pdf", ".doc", ".docx"}: logger.error("sample file must be PDF, DOC, or DOCX") return False if not settings.alibaba_access_key_id or not settings.alibaba_access_key_secret: logger.error("missing Aliyun parser credentials") return False try: result = get_document_command_service().upload_and_process( file_name=sample_file.name, content=sample_file.read_bytes(), content_type=_guess_content_type(sample_file), doc_name=sample_file.stem, regulation_type="smoke-test", version="migration", generate_summary=False, ) logger.info(f"process result: doc_id={result.doc_id}, status={result.status}, chunks={result.num_chunks}") if result.status != "indexed": logger.error(f"ingest failed: {result.message}") return False retrieval_results = get_retrieval_service().retrieve( query=sample_file.stem, top_k=3, filters=f'doc_id == "{result.doc_id}"', ) logger.info(f"retrieval count: {len(retrieval_results)}") logger.success("real ingest pipeline ok") return True except Exception as exc: logger.error(f"real ingest pipeline failed: {exc}") return False def _guess_content_type(sample_file: Path) -> str: suffix = sample_file.suffix.lower() if suffix == ".pdf": return "application/pdf" if suffix == ".doc": return "application/msword" if suffix == ".docx": return "application/vnd.openxmlformats-officedocument.wordprocessingml.document" return "application/octet-stream" def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Verify the migrated backend path") parser.add_argument("--sample-file", type=Path, help="Optional PDF/DOC/DOCX for real ingest verification") return parser.parse_args() def main() -> bool: args = parse_args() results = [ ("module_wiring", verify_service_wiring()), ("migration_config", verify_migration_config()), ("minio_connection", verify_minio_connection()), ("milvus_connection", verify_milvus_connection()), ] if args.sample_file: results.append(("real_ingest_pipeline", verify_ingest_pipeline(args.sample_file))) else: logger.info("no sample file provided; skip real ingest check") logger.info("\n" + "=" * 60) logger.info("check summary") logger.info("=" * 60) all_passed = True for name, passed in results: status = "PASS" if passed else "FAIL" logger.info(f"{name}: {status}") if not passed: all_passed = False if all_passed: logger.success("all executed checks passed") else: logger.warning("some checks failed; inspect environment dependencies") return all_passed if __name__ == "__main__": sys.exit(0 if main() else 1)