diff --git a/doris_mcp_server/main.py b/doris_mcp_server/main.py
index e5d42f8..95a34ea 100644
--- a/doris_mcp_server/main.py
+++ b/doris_mcp_server/main.py
@@ -618,6 +618,11 @@ Transport Modes:
 Examples:
   python -m doris_mcp_server --transport stdio
   python -m doris_mcp_server --transport http --host 0.0.0.0 --port 3000
+  python -m doris_mcp_server --transport stdio --doris-host localhost --doris-port 9030
+  python -m doris_mcp_server --transport http --doris-user admin --doris-database test_db
+  
+  # Backward compatibility: --db-* parameters are also supported
+  python -m doris_mcp_server --transport stdio --db-host localhost --db-port 9030
         """
     )
 
@@ -641,26 +646,26 @@ Examples:
     )
 
     parser.add_argument(
-        "--db-host",
+        "--doris-host", "--db-host",
         type=str,
-        default=os.getenv("DB_HOST", _default_config.database.host),
+        default=os.getenv("DORIS_HOST", _default_config.database.host),
         help=f"Doris database host address (default: {_default_config.database.host})",
     )
 
     parser.add_argument(
-        "--db-port", type=int, default=os.getenv("DB_PORT", _default_config.database.port), help=f"Doris database port number (default: {_default_config.database.port})"
+        "--doris-port", "--db-port", type=int, default=os.getenv("DORIS_PORT", _default_config.database.port), help=f"Doris database port number (default: {_default_config.database.port})"
     )
 
     parser.add_argument(
-        "--db-user", type=str, default=os.getenv("DB_USER", _default_config.database.user), help=f"Doris database username (default: {_default_config.database.user})"
+        "--doris-user", "--db-user", type=str, default=os.getenv("DORIS_USER", _default_config.database.user), help=f"Doris database username (default: {_default_config.database.user})"
     )
 
-    parser.add_argument("--db-password", type=str, default="", help="Doris database password")
+    parser.add_argument("--doris-password", "--db-password", type=str, default=os.getenv("DORIS_PASSWORD", ""), help="Doris database password")
 
     parser.add_argument(
-        "--db-database",
+        "--doris-database", "--db-database",
         type=str,
-        default=os.getenv("DB_DATABASE", _default_config.database.database),
+        default=os.getenv("DORIS_DATABASE", _default_config.database.database),
         help=f"Doris database name (default: {_default_config.database.database})",
     )
 
@@ -684,16 +689,19 @@ async def main():
     config = DorisConfig.from_env()  # First load from .env file and environment variables
     
     # Command line arguments override configuration (if provided)
-    if args.db_host != _default_config.database.host:  # If not default value, use command line argument
-        config.database.host = args.db_host
-    if args.db_port != _default_config.database.port:
-        config.database.port = args.db_port
-    if args.db_user != _default_config.database.user:
-        config.database.user = args.db_user
-    if args.db_password:  # Use password if provided
-        config.database.password = args.db_password
-    if args.db_database != _default_config.database.database:
-        config.database.database = args.db_database
+    # 🔧 FIX: Set transport from command line arguments
+    config.transport = args.transport
+    
+    if args.doris_host != _default_config.database.host:  # If not default value, use command line argument
+        config.database.host = args.doris_host
+    if args.doris_port != _default_config.database.port:
+        config.database.port = args.doris_port
+    if args.doris_user != _default_config.database.user:
+        config.database.user = args.doris_user
+    if args.doris_password:  # Use password if provided
+        config.database.password = args.doris_password
+    if args.doris_database != _default_config.database.database:
+        config.database.database = args.doris_database
     if args.log_level != _default_config.logging.level:
         config.logging.level = args.log_level
 
diff --git a/doris_mcp_server/tools/tools_manager.py b/doris_mcp_server/tools/tools_manager.py
index c702938..9f2aab9 100644
--- a/doris_mcp_server/tools/tools_manager.py
+++ b/doris_mcp_server/tools/tools_manager.py
@@ -61,7 +61,7 @@ class DorisToolsManager:
         # Initialize v0.5.0 advanced analytics tools
         self.data_governance_tools = DataGovernanceTools(connection_manager)
         self.data_exploration_tools = DataExplorationTools(connection_manager)
-        self.data_quality_tools = DataQualityTools(connection_manager)
+        self.data_quality_tools = DataQualityTools(connection_manager, connection_manager.config)
         self.security_analytics_tools = SecurityAnalyticsTools(connection_manager)
         self.dependency_analysis_tools = DependencyAnalysisTools(connection_manager)
         self.performance_analytics_tools = PerformanceAnalyticsTools(connection_manager)
@@ -464,41 +464,87 @@ class DorisToolsManager:
         
         # 🔄 Unified Data Quality Analysis Tool (New in v0.5.0)
         @mcp.tool(
-            "analyze_data_quality",
-            description="""[Function Description]: Comprehensive data quality analysis combining completeness and distribution analysis.
+            "get_table_basic_info",
+            description="""[Function Description]: Get basic information about a table including row count, column count, partitions, and size.
 
 [Parameter Content]:
 
 - table_name (string) [Required] - Name of the table to analyze
-- analysis_scope (string) [Optional] - Analysis scope, default is "comprehensive"
-  * "completeness": Only completeness analysis (null rates, business rules)
-  * "distribution": Only distribution analysis (statistical patterns)
-  * "comprehensive": Full analysis including both completeness and distribution
+- catalog_name (string) [Optional] - Target catalog name
+- db_name (string) [Optional] - Target database name
+""",
+        )
+        async def get_table_basic_info_tool(
+            table_name: str,
+            catalog_name: str = None,
+            db_name: str = None
+        ) -> str:
+            """Get table basic information"""
+            return await self.call_tool("get_table_basic_info", {
+                "table_name": table_name,
+                "catalog_name": catalog_name,
+                "db_name": db_name
+            })
+
+        @mcp.tool(
+            "analyze_columns",
+            description="""[Function Description]: Analyze completeness and distribution of specified columns in a table.
+
+[Parameter Content]:
+
+- table_name (string) [Required] - Name of the table to analyze
+- columns (array) [Required] - List of column names to analyze
+- analysis_types (array) [Optional] - Types of analysis to perform, default is ["both"]
+  * "completeness": Only completeness analysis (null rates, non-null counts)
+  * "distribution": Only distribution analysis (statistical patterns by data type)
+  * "both": Both completeness and distribution analysis
 - sample_size (integer) [Optional] - Maximum number of rows to sample, default is 100000
-- include_all_columns (boolean) [Optional] - Whether to analyze all columns, default is false
-- business_rules (array) [Optional] - Business rule validations in format [{"rule_name": "email_format", "sql_condition": "email REGEXP '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'"}]
 - catalog_name (string) [Optional] - Target catalog name
 - db_name (string) [Optional] - Target database name
 - detailed_response (boolean) [Optional] - Whether to return detailed response including raw data, default is false
 """,
         )
-        async def analyze_data_quality_tool(
+        async def analyze_columns_tool(
             table_name: str,
-            analysis_scope: str = "comprehensive",
+            columns: List[str],
+            analysis_types: List[str] = None,
             sample_size: int = 100000,
-            include_all_columns: bool = False,
-            business_rules: List[dict] = None,
             catalog_name: str = None,
             db_name: str = None,
             detailed_response: bool = False
         ) -> str:
-            """Unified data quality analysis tool"""
-            return await self.call_tool("analyze_data_quality", {
+            """Analyze table columns"""
+            return await self.call_tool("analyze_columns", {
                 "table_name": table_name,
-                "analysis_scope": analysis_scope,
+                "columns": columns,
+                "analysis_types": analysis_types or ["both"],
                 "sample_size": sample_size,
-                "include_all_columns": include_all_columns,
-                "business_rules": business_rules,
+                "catalog_name": catalog_name,
+                "db_name": db_name,
+                "detailed_response": detailed_response
+            })
+
+        @mcp.tool(
+            "analyze_table_storage",
+            description="""[Function Description]: Analyze table's physical distribution and storage information.
+
+[Parameter Content]:
+
+- table_name (string) [Required] - Name of the table to analyze
+- catalog_name (string) [Optional] - Target catalog name
+- db_name (string) [Optional] - Target database name
+- detailed_response (boolean) [Optional] - Whether to return detailed response including raw data, default is false
+""",
+        )
+        async def analyze_table_storage_tool(
+            table_name: str,
+            catalog_name: str = None,
+            db_name: str = None,
+            detailed_response: bool = False
+        ) -> str:
+            """Analyze table storage"""
+            return await self.call_tool("analyze_table_storage", {
+                "table_name": table_name,
                 "catalog_name": catalog_name,
                 "db_name": db_name,
                 "detailed_response": detailed_response
@@ -721,7 +767,7 @@ No parameters required. Returns connection status, configuration, and diagnostic
             """Get ADBC connection information and status"""
             return await self.call_tool("get_adbc_connection_info", {})
 
-        logger.info("Successfully registered 23 tools to MCP server (14 basic + 7 advanced analytics + 2 ADBC tools)")
+        logger.info("Successfully registered 25 tools to MCP server (14 basic + 9 advanced analytics + 2 ADBC tools)")
 
     async def list_tools(self) -> List[Tool]:
         """List all available query tools (for stdio mode)"""
@@ -1064,20 +1110,14 @@ No parameters required. Returns connection status, configuration, and diagnostic
                 },
             ),
             # ==================== v0.5.0 Advanced Analytics Tools ====================
+            # Atomic Data Quality Analysis Tools
             Tool(
-                name="analyze_data_quality",
-                description="""[Function Description]: Comprehensive data quality analysis combining completeness and distribution analysis.
+                name="get_table_basic_info",
+                description="""[Function Description]: Get basic information about a table including row count, column count, partitions, and size.
 
 [Parameter Content]:
 
 - table_name (string) [Required] - Name of the table to analyze
-- analysis_scope (string) [Optional] - Analysis scope, default is "comprehensive"
-  * "completeness": Only completeness analysis (null rates, business rules)
-  * "distribution": Only distribution analysis (statistical patterns)
-  * "comprehensive": Full analysis including both completeness and distribution
-- sample_size (integer) [Optional] - Maximum number of rows to sample, default is 100000
-- include_all_columns (boolean) [Optional] - Whether to analyze all columns, default is false
-- business_rules (array) [Optional] - Business rule validations in format [{"rule_name": "email_format", "sql_condition": "email REGEXP '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$'"}]
 - catalog_name (string) [Optional] - Target catalog name
 - db_name (string) [Optional] - Target database name
 """,
@@ -1085,10 +1125,58 @@ No parameters required. Returns connection status, configuration, and diagnostic
                     "type": "object",
                     "properties": {
                         "table_name": {"type": "string", "description": "Name of the table to analyze"},
-                        "analysis_scope": {"type": "string", "enum": ["completeness", "distribution", "comprehensive"], "description": "Analysis scope", "default": "comprehensive"},
+                        "catalog_name": {"type": "string", "description": "Target catalog name"},
+                        "db_name": {"type": "string", "description": "Target database name"},
+                    },
+                    "required": ["table_name"],
+                },
+            ),
+            Tool(
+                name="analyze_columns",
+                description="""[Function Description]: Analyze completeness and distribution of specified columns in a table.
+
+[Parameter Content]:
+
+- table_name (string) [Required] - Name of the table to analyze
+- columns (array) [Required] - List of column names to analyze
+- analysis_types (array) [Optional] - Types of analysis to perform, default is ["both"]
+  * "completeness": Only completeness analysis (null rates, non-null counts)
+  * "distribution": Only distribution analysis (statistical patterns by data type)
+  * "both": Both completeness and distribution analysis
+- sample_size (integer) [Optional] - Maximum number of rows to sample, default is 100000
+- catalog_name (string) [Optional] - Target catalog name
+- db_name (string) [Optional] - Target database name
+- detailed_response (boolean) [Optional] - Whether to return detailed response including raw data, default is false
+""",
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "table_name": {"type": "string", "description": "Name of the table to analyze"},
+                        "columns": {"type": "array", "items": {"type": "string"}, "description": "List of column names to analyze"},
+                        "analysis_types": {"type": "array", "items": {"type": "string", "enum": ["completeness", "distribution", "both"]}, "description": "Types of analysis to perform", "default": ["both"]},
                         "sample_size": {"type": "integer", "description": "Maximum number of rows to sample", "default": 100000},
-                        "include_all_columns": {"type": "boolean", "description": "Whether to analyze all columns", "default": False},
-                        "business_rules": {"type": "array", "items": {"type": "object"}, "description": "Business rule validations"},
+                        "catalog_name": {"type": "string", "description": "Target catalog name"},
+                        "db_name": {"type": "string", "description": "Target database name"},
+                        "detailed_response": {"type": "boolean", "description": "Whether to return detailed response including raw data", "default": False},
+                    },
+                    "required": ["table_name", "columns"],
+                },
+            ),
+            Tool(
+                name="analyze_table_storage",
+                description="""[Function Description]: Analyze table's physical distribution and storage information.
+
+[Parameter Content]:
+
+- table_name (string) [Required] - Name of the table to analyze
+- catalog_name (string) [Optional] - Target catalog name
+- db_name (string) [Optional] - Target database name
+- detailed_response (boolean) [Optional] - Whether to return detailed response including raw data, default is false
+""",
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "table_name": {"type": "string", "description": "Name of the table to analyze"},
                         "catalog_name": {"type": "string", "description": "Target catalog name"},
                         "db_name": {"type": "string", "description": "Target database name"},
                         "detailed_response": {"type": "boolean", "description": "Whether to return detailed response including raw data", "default": False},
@@ -1096,7 +1184,6 @@ No parameters required. Returns connection status, configuration, and diagnostic
                     "required": ["table_name"],
                 },
             ),
-
             Tool(
                 name="trace_column_lineage",
                 description="""[Function Description]: Trace data lineage for specified columns through SQL analysis and dependency mapping.
@@ -1323,9 +1410,13 @@ No parameters required. Returns connection status, configuration, and diagnostic
             elif name == "get_historical_memory_stats":
                 arguments["data_type"] = "historical"
                 result = await self._get_memory_stats_tool(arguments)
-            # v0.5.0 Advanced Analytics Tools
-            elif name == "analyze_data_quality":
-                result = await self._analyze_data_quality_tool(arguments)
+            # v0.5.0 Advanced Analytics Tools - Atomic Data Quality Tools
+            elif name == "get_table_basic_info":
+                result = await self._get_table_basic_info_tool(arguments)
+            elif name == "analyze_columns":
+                result = await self._analyze_columns_tool(arguments)
+            elif name == "analyze_table_storage":
+                result = await self._analyze_table_storage_tool(arguments)
             elif name == "trace_column_lineage":
                 result = await self._trace_column_lineage_tool(arguments)
             elif name == "monitor_data_freshness":
@@ -1595,26 +1686,46 @@ No parameters required. Returns connection status, configuration, and diagnostic
     
     # ==================== v0.5.0 Advanced Analytics Tools Private Methods ====================
     
-    async def _analyze_data_quality_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
-        """Unified data quality analysis tool routing"""
+    async def _get_table_basic_info_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        """Get table basic information tool routing"""
         try:
-            # Extract parameters
             table_name = arguments.get("table_name")
-            analysis_scope = arguments.get("analysis_scope", "comprehensive")
+            catalog_name = arguments.get("catalog_name")
+            db_name = arguments.get("db_name")
+            
+            # Delegate to atomic data quality tools
+            result = await self.data_quality_tools.get_table_basic_info(
+                table_name=table_name,
+                catalog_name=catalog_name,
+                db_name=db_name
+            )
+            
+            return result
+            
+        except Exception as e:
+            return {
+                "error": str(e),
+                "analysis_type": "table_basic_info",
+                "timestamp": datetime.now().isoformat()
+            }
+    
+    async def _analyze_columns_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        """Analyze columns tool routing"""
+        try:
+            table_name = arguments.get("table_name")
+            columns = arguments.get("columns")
+            analysis_types = arguments.get("analysis_types", ["both"])
             sample_size = arguments.get("sample_size", 100000)
-            include_all_columns = arguments.get("include_all_columns", False)
-            business_rules = arguments.get("business_rules", [])
             catalog_name = arguments.get("catalog_name")
             db_name = arguments.get("db_name")
             detailed_response = arguments.get("detailed_response", False)
             
-            # Delegate to the unified data quality tools
-            result = await self.data_quality_tools.analyze_data_quality(
+            # Delegate to atomic data quality tools
+            result = await self.data_quality_tools.analyze_columns(
                 table_name=table_name,
-                analysis_scope=analysis_scope,
+                columns=columns,
+                analysis_types=analysis_types,
                 sample_size=sample_size,
-                include_all_columns=include_all_columns,
-                business_rules=business_rules,
                 catalog_name=catalog_name,
                 db_name=db_name,
                 detailed_response=detailed_response
@@ -1625,7 +1736,32 @@ No parameters required. Returns connection status, configuration, and diagnostic
         except Exception as e:
             return {
                 "error": str(e),
-                "analysis_type": "unified_data_quality",
+                "analysis_type": "columns_analysis",
+                "timestamp": datetime.now().isoformat()
+            }
+    
+    async def _analyze_table_storage_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        """Analyze table storage tool routing"""
+        try:
+            table_name = arguments.get("table_name")
+            catalog_name = arguments.get("catalog_name")
+            db_name = arguments.get("db_name")
+            detailed_response = arguments.get("detailed_response", False)
+            
+            # Delegate to atomic data quality tools
+            result = await self.data_quality_tools.analyze_table_storage(
+                table_name=table_name,
+                catalog_name=catalog_name,
+                db_name=db_name,
+                detailed_response=detailed_response
+            )
+            
+            return result
+            
+        except Exception as e:
+            return {
+                "error": str(e),
+                "analysis_type": "table_storage_analysis",
                 "timestamp": datetime.now().isoformat()
             }
     
diff --git a/doris_mcp_server/utils/config.py b/doris_mcp_server/utils/config.py
index efae016..8e735da 100644
--- a/doris_mcp_server/utils/config.py
+++ b/doris_mcp_server/utils/config.py
@@ -137,6 +137,33 @@ class PerformanceConfig:
     max_response_content_size: int = 4096
 
 
+@dataclass
+class DataQualityConfig:
+    """Data quality analysis configuration"""
+
+    # Column analysis configuration
+    max_columns_per_batch: int = 20  # Maximum columns to analyze in a single batch
+    default_sample_size: int = 100000  # Default sample size for analysis
+    
+    # Sampling strategy configuration
+    small_table_threshold: int = 100000  # Tables smaller than this use full table analysis
+    medium_table_threshold: int = 1000000  # Tables smaller than this use simple LIMIT sampling
+    # Tables larger than medium_table_threshold use systematic sampling
+    
+    # Performance optimization
+    enable_batch_analysis: bool = True  # Enable batch analysis for multiple columns
+    batch_timeout: int = 300  # Timeout for batch analysis in seconds
+    
+    # Accuracy vs Performance trade-off
+    enable_fast_mode: bool = False  # Use approximate algorithms for faster results
+    fast_mode_sample_size: int = 10000  # Sample size for fast mode
+    
+    # Statistical analysis configuration
+    enable_distribution_analysis: bool = True  # Enable distribution analysis
+    histogram_bins: int = 20  # Number of bins for histogram analysis
+    percentile_levels: list[float] = field(default_factory=lambda: [0.25, 0.5, 0.75, 0.95, 0.99])  # Percentile levels to calculate
+
+
 @dataclass
 class ADBCConfig:
     """ADBC (Arrow Flight SQL) configuration"""
@@ -208,6 +235,7 @@ class DorisConfig:
     database: DatabaseConfig = field(default_factory=DatabaseConfig)
     security: SecurityConfig = field(default_factory=SecurityConfig)
     performance: PerformanceConfig = field(default_factory=PerformanceConfig)
+    data_quality: DataQualityConfig = field(default_factory=DataQualityConfig)
     logging: LoggingConfig = field(default_factory=LoggingConfig)
     monitoring: MonitoringConfig = field(default_factory=MonitoringConfig)
     adbc: ADBCConfig = field(default_factory=ADBCConfig)
@@ -404,6 +432,38 @@ class DorisConfig:
             os.getenv("ADBC_ENABLED", str(config.adbc.enabled).lower()).lower() == "true"
         )
 
+        # Data quality configuration
+        config.data_quality.max_columns_per_batch = int(
+            os.getenv("DATA_QUALITY_MAX_COLUMNS_PER_BATCH", str(config.data_quality.max_columns_per_batch))
+        )
+        config.data_quality.default_sample_size = int(
+            os.getenv("DATA_QUALITY_DEFAULT_SAMPLE_SIZE", str(config.data_quality.default_sample_size))
+        )
+        config.data_quality.small_table_threshold = int(
+            os.getenv("DATA_QUALITY_SMALL_TABLE_THRESHOLD", str(config.data_quality.small_table_threshold))
+        )
+        config.data_quality.medium_table_threshold = int(
+            os.getenv("DATA_QUALITY_MEDIUM_TABLE_THRESHOLD", str(config.data_quality.medium_table_threshold))
+        )
+        config.data_quality.enable_batch_analysis = (
+            os.getenv("DATA_QUALITY_ENABLE_BATCH_ANALYSIS", str(config.data_quality.enable_batch_analysis).lower()).lower() == "true"
+        )
+        config.data_quality.batch_timeout = int(
+            os.getenv("DATA_QUALITY_BATCH_TIMEOUT", str(config.data_quality.batch_timeout))
+        )
+        config.data_quality.enable_fast_mode = (
+            os.getenv("DATA_QUALITY_ENABLE_FAST_MODE", str(config.data_quality.enable_fast_mode).lower()).lower() == "true"
+        )
+        config.data_quality.fast_mode_sample_size = int(
+            os.getenv("DATA_QUALITY_FAST_MODE_SAMPLE_SIZE", str(config.data_quality.fast_mode_sample_size))
+        )
+        config.data_quality.enable_distribution_analysis = (
+            os.getenv("DATA_QUALITY_ENABLE_DISTRIBUTION_ANALYSIS", str(config.data_quality.enable_distribution_analysis).lower()).lower() == "true"
+        )
+        config.data_quality.histogram_bins = int(
+            os.getenv("DATA_QUALITY_HISTOGRAM_BINS", str(config.data_quality.histogram_bins))
+        )
+
         # Server configuration
         config.server_name = os.getenv("SERVER_NAME", config.server_name)
         config.server_version = os.getenv("SERVER_VERSION", config.server_version)
@@ -443,6 +503,13 @@ class DorisConfig:
                 if hasattr(config.performance, key):
                     setattr(config.performance, key, value)
 
+        # Update data quality configuration
+        if "data_quality" in config_data:
+            dq_config = config_data["data_quality"]
+            for key, value in dq_config.items():
+                if hasattr(config.data_quality, key):
+                    setattr(config.data_quality, key, value)
+
         # Update logging configuration
         if "logging" in config_data:
             log_config = config_data["logging"]
@@ -516,6 +583,19 @@ class DorisConfig:
                 "idle_timeout": self.performance.idle_timeout,
                 "max_response_content_size": self.performance.max_response_content_size,
             },
+            "data_quality": {
+                "max_columns_per_batch": self.data_quality.max_columns_per_batch,
+                "default_sample_size": self.data_quality.default_sample_size,
+                "small_table_threshold": self.data_quality.small_table_threshold,
+                "medium_table_threshold": self.data_quality.medium_table_threshold,
+                "enable_batch_analysis": self.data_quality.enable_batch_analysis,
+                "batch_timeout": self.data_quality.batch_timeout,
+                "enable_fast_mode": self.data_quality.enable_fast_mode,
+                "fast_mode_sample_size": self.data_quality.fast_mode_sample_size,
+                "enable_distribution_analysis": self.data_quality.enable_distribution_analysis,
+                "histogram_bins": self.data_quality.histogram_bins,
+                "percentile_levels": self.data_quality.percentile_levels,
+            },
             "logging": {
                 "level": self.logging.level,
                 "format": self.logging.format,
@@ -602,6 +682,31 @@ class DorisConfig:
         if self.performance.query_timeout <= 0:
             errors.append("Query timeout must be greater than 0")
 
+        # Validate data quality configuration
+        if self.data_quality.max_columns_per_batch <= 0:
+            errors.append("Max columns per batch must be greater than 0")
+
+        if self.data_quality.default_sample_size <= 0:
+            errors.append("Default sample size must be greater than 0")
+
+        if self.data_quality.small_table_threshold <= 0:
+            errors.append("Small table threshold must be greater than 0")
+
+        if self.data_quality.medium_table_threshold <= 0:
+            errors.append("Medium table threshold must be greater than 0")
+
+        if self.data_quality.small_table_threshold >= self.data_quality.medium_table_threshold:
+            errors.append("Small table threshold must be less than medium table threshold")
+
+        if self.data_quality.batch_timeout <= 0:
+            errors.append("Batch timeout must be greater than 0")
+
+        if self.data_quality.fast_mode_sample_size <= 0:
+            errors.append("Fast mode sample size must be greater than 0")
+
+        if self.data_quality.histogram_bins <= 0:
+            errors.append("Histogram bins must be greater than 0")
+
         # Validate logging configuration
         if self.logging.level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
             errors.append("Log level must be one of DEBUG, INFO, WARNING, ERROR, or CRITICAL")
diff --git a/doris_mcp_server/utils/data_governance_tools.py b/doris_mcp_server/utils/data_governance_tools.py
index a413dc5..4589c34 100644
--- a/doris_mcp_server/utils/data_governance_tools.py
+++ b/doris_mcp_server/utils/data_governance_tools.py
@@ -59,29 +59,57 @@ class DataGovernanceTools:
         """
         try:
             start_time = time.time()
+            
+            # 🚀 PROGRESS: Initialize column lineage tracing
+            logger.info("=" * 60)
+            logger.info(f"🔍 Starting Column Lineage Tracing")
+            logger.info(f"📊 Target: {table_name}.{column_name}")
+            logger.info(f"🎯 Trace depth: {depth}")
+            logger.info("=" * 60)
+            
             connection = await self.connection_manager.get_connection("query")
             
             full_table_name = self._build_full_table_name(table_name, catalog_name, db_name)
             target_column = f"{full_table_name}.{column_name}"
             
-            # 1. Verify target column exists
+            logger.info(f"📝 Full target: {target_column}")
+            
+            # 🚀 PROGRESS: Step 1 - Verify target column exists
+            logger.info("🔍 Step 1/4: Verifying target column exists...")
+            verify_start = time.time()
             if not await self._verify_column_exists(connection, full_table_name, column_name):
+                logger.error(f"❌ Column {column_name} not found in table {full_table_name}")
                 return {"error": f"Column {column_name} not found in table {full_table_name}"}
             
-            # 2. Analyze SQL logs to get lineage relationships
+            verify_time = time.time() - verify_start
+            logger.info(f"✅ Column verified in {verify_time:.2f}s")
+            
+            # 🚀 PROGRESS: Step 2 - Analyze SQL logs for lineage relationships
+            logger.info(f"📊 Step 2/4: Analyzing SQL logs for lineage (depth={depth})...")
+            lineage_start = time.time()
             source_chain = await self._analyze_sql_logs_for_lineage(
                 connection, full_table_name, column_name, depth
             )
+            lineage_time = time.time() - lineage_start
+            logger.info(f"✅ Found {len(source_chain)} lineage relationships in {lineage_time:.2f}s")
             
-            # 3. Analyze downstream usage
+            # 🚀 PROGRESS: Step 3 - Analyze downstream usage
+            logger.info("⬇️ Step 3/4: Analyzing downstream column usage...")
+            downstream_start = time.time()
             downstream_usage = await self._analyze_downstream_column_usage(
                 connection, full_table_name, column_name
             )
+            downstream_time = time.time() - downstream_start
+            logger.info(f"✅ Found {len(downstream_usage)} downstream usages in {downstream_time:.2f}s")
             
-            # 4. Analyze field transformation rules
+            # 🚀 PROGRESS: Step 4 - Extract transformation rules
+            logger.info("🔄 Step 4/4: Extracting transformation rules...")
+            transform_start = time.time()
             transformation_rules = await self._extract_transformation_rules(
                 connection, full_table_name, column_name
             )
+            transform_time = time.time() - transform_start
+            logger.info(f"✅ Found {len(transformation_rules)} transformation rules in {transform_time:.2f}s")
             
             execution_time = time.time() - start_time
             
diff --git a/doris_mcp_server/utils/data_quality_tools.py b/doris_mcp_server/utils/data_quality_tools.py
index 7c71ecc..9058f4f 100644
--- a/doris_mcp_server/utils/data_quality_tools.py
+++ b/doris_mcp_server/utils/data_quality_tools.py
@@ -15,8 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 """
-Unified Data Quality Tools Module
-Provides comprehensive data quality analysis combining completeness and distribution analysis
+Atomic Data Quality Tools Module
+Provides atomic data quality analysis tools for flexible composition
 """
 
 import asyncio
@@ -30,856 +30,289 @@ from collections import Counter, defaultdict
 
 from .db import DorisConnectionManager
 from .logger import get_logger
+from .config import DorisConfig
 
 logger = get_logger(__name__)
 
 
 class DataQualityTools:
-    """Unified data quality analysis tools"""
+    """Atomic data quality analysis tools"""
     
-    def __init__(self, connection_manager: DorisConnectionManager):
+    def __init__(self, connection_manager: DorisConnectionManager, config: DorisConfig = None):
         self.connection_manager = connection_manager
-        logger.info("DataQualityTools initialized")
+        self.config = config or DorisConfig.from_env()
+        logger.info("DataQualityTools initialized with atomic tools")
     
-    async def analyze_data_quality(
+    async def get_table_basic_info(
         self, 
         table_name: str,
-        analysis_scope: str = "comprehensive",
-        sample_size: int = 100000,
-        include_all_columns: bool = False,
-        business_rules: Optional[List[Dict]] = None,
         catalog_name: Optional[str] = None,
-        db_name: Optional[str] = None,
-        detailed_response: bool = False
+        db_name: Optional[str] = None
     ) -> Dict[str, Any]:
         """
-        Unified data quality analysis tool
+        Get basic table information
         
         Args:
             table_name: Table name
-            analysis_scope: Analysis scope ("completeness", "distribution", "comprehensive")
-            sample_size: Sample size
-            include_all_columns: Whether to analyze all columns
-            business_rules: Business rules list
-            catalog_name: Catalog name
-            db_name: Database name
-            detailed_response: Whether to return detailed response (default: False for optimized response)
+            catalog_name: Catalog name (optional)
+            db_name: Database name (optional)
         
         Returns:
-            Unified data quality analysis result
+            Dictionary containing basic table information:
+            - table_name: Full table name
+            - row_count: Number of rows
+            - column_count: Number of columns
+            - columns_info: List of column information
+            - partitions_info: Partition information (if any)
+            - table_size: Table size information
         """
         try:
             start_time = time.time()
-            connection = await self.connection_manager.get_connection("query")
+            logger.info(f"🔍 Getting basic info for table: {table_name}")
             
-            # Build full table name
-            full_table_name = self._build_full_table_name(table_name, catalog_name, db_name)
-            
-            # Get table basic info
-            table_info = await self._get_table_basic_info(connection, full_table_name)
-            if not table_info:
-                return {"error": f"Table {full_table_name} not found"}
-            
-            # Get column info
-            columns_info = await self._get_table_columns_info(connection, table_name, catalog_name, db_name)
-            
-            # Determine sampling strategy
-            sampling_info = await self._determine_sampling_strategy(
-                connection, full_table_name, table_info["row_count"], sample_size
-            )
-            
-            # Select analysis columns
-            target_columns = self._select_analysis_columns(columns_info, include_all_columns)
-            
-            # Execute analysis
-            results = {
-                "table_name": full_table_name,
-                "analysis_timestamp": datetime.now().isoformat(),
-                "analysis_scope": analysis_scope,
-                "table_summary": {
-                    "total_rows": table_info["row_count"],
-                    "total_columns": len(columns_info),
-                    "columns_analyzed": len(target_columns),
-                    "sample_info": sampling_info
+            async with self.connection_manager.get_connection_context("query") as connection:
+                # Build full table name
+                full_table_name = self._build_full_table_name(table_name, catalog_name, db_name)
+                logger.info(f"📝 Full table name: {full_table_name}")
+                
+                # Get basic table information
+                table_info = await self._get_table_basic_info(connection, full_table_name)
+                if not table_info:
+                    return {"error": f"Table {full_table_name} not found"}
+                
+                # Get column information
+                columns_info = await self._get_table_columns_info(connection, table_name, catalog_name, db_name)
+                
+                # Get partition information
+                partitions_info = await self._get_table_partitions(connection, table_name, db_name)
+                
+                # Get table size information
+                size_info = await self._get_table_size_info(connection, full_table_name)
+                
+                execution_time = time.time() - start_time
+                
+                result = {
+                    "table_name": full_table_name,
+                    "analysis_timestamp": datetime.now().isoformat(),
+                    "row_count": table_info["row_count"],
+                    "column_count": len(columns_info),
+                    "columns_info": columns_info,
+                    "partitions_info": {
+                        "partition_count": len(partitions_info),
+                        "partitions": partitions_info
+                    },
+                    "table_size": size_info,
+                    "execution_time_seconds": round(execution_time, 3)
                 }
-            }
-            
-            # Execute analysis tasks in parallel
-            tasks = []
-            
-            if analysis_scope in ["completeness", "comprehensive"]:
-                tasks.append(self._analyze_completeness_enhanced(
-                    connection, full_table_name, target_columns, business_rules or [], sampling_info
-                ))
-            
-            if analysis_scope in ["distribution", "comprehensive"]:
-                tasks.append(self._analyze_distribution_enhanced(
-                    connection, full_table_name, target_columns, sampling_info, table_name, catalog_name, db_name, detailed_response
-                ))
-            
-            # Wait for all tasks to complete
-            analysis_results = await asyncio.gather(*tasks)
-            
-            # Assemble results
-            if analysis_scope in ["completeness", "comprehensive"]:
-                completeness_idx = 0 if analysis_scope == "completeness" else 0
-                results["completeness_analysis"] = analysis_results[completeness_idx]
-            
-            if analysis_scope in ["distribution", "comprehensive"]:
-                distribution_idx = 0 if analysis_scope == "distribution" else (1 if analysis_scope == "comprehensive" else 0)
-                results["distribution_analysis"] = analysis_results[distribution_idx]
-            
-            # Generate unified insights
-            if analysis_scope == "comprehensive":
-                results["unified_insights"] = await self._generate_unified_insights(
-                    results.get("completeness_analysis", {}),
-                    results.get("distribution_analysis", {})
-                )
-            
-            execution_time = time.time() - start_time
-            results["execution_time_seconds"] = round(execution_time, 3)
-            
-            return results
-            
+                
+                logger.info(f"✅ Table info retrieved - Rows: {table_info['row_count']:,}, Columns: {len(columns_info)}, Partitions: {len(partitions_info)}")
+                return result
+                
         except Exception as e:
-            logger.error(f"Data quality analysis failed for {table_name}: {str(e)}")
-            return {
-                "error": str(e),
-                "table_name": table_name,
-                "analysis_timestamp": datetime.now().isoformat()
-            }
-    
-    # ==== Backward Compatible Methods ====
-    
-    async def analyze_data_completeness(
-        self, 
-        table_name: str, 
-        business_rules: Optional[List[Dict]] = None,
-        catalog_name: Optional[str] = None,
-        db_name: Optional[str] = None
-    ) -> Dict[str, Any]:
-        """Backward compatible: data completeness analysis"""
-        return await self.analyze_data_quality(
-            table_name=table_name,
-            analysis_scope="completeness",
-            business_rules=business_rules,
-            catalog_name=catalog_name,
-            db_name=db_name
-        )
-    
-    async def analyze_table_distribution(
-        self, 
-        table_name: str,
-        sample_size: int = 100000,
-        include_all_columns: bool = False,
-        catalog_name: Optional[str] = None,
-        db_name: Optional[str] = None
-    ) -> Dict[str, Any]:
-        """Backward compatible: table distribution analysis"""
-        return await self.analyze_data_quality(
-            table_name=table_name,
-            analysis_scope="distribution",
-            sample_size=sample_size,
-            include_all_columns=include_all_columns,
-            catalog_name=catalog_name,
-            db_name=db_name
-        )
-    
-    # ==== Core Analysis Methods ====
-    
-    async def _analyze_completeness_enhanced(
-        self, 
-        connection, 
-        table_name: str, 
-        columns_info: List[Dict], 
-        business_rules: List[Dict],
-        sampling_info: Dict
-    ) -> Dict[str, Any]:
-        """Enhanced completeness analysis"""
-        try:
-            # Analyze column completeness
-            column_completeness = await self._analyze_column_completeness_unified(
-                connection, table_name, columns_info, sampling_info
-            )
-            
-            # Business rule compliance check
-            business_rule_compliance = {}
-            if business_rules:
-                business_rule_compliance = await self._check_business_rule_compliance(
-                    connection, table_name, business_rules, sampling_info
-                )
-            
-            # Data integrity issue detection
-            integrity_issues = await self._detect_data_integrity_issues(
-                connection, table_name, columns_info, sampling_info
-            )
-            
-            # Calculate overall completeness score
-            overall_score = self._calculate_completeness_score(column_completeness, business_rule_compliance)
-            
-            return {
-                "overall_completeness_score": overall_score,
-                "column_completeness": column_completeness,
-                "business_rule_compliance": business_rule_compliance,
-                "integrity_issues": integrity_issues,
-                "recommendations": self._generate_completeness_recommendations(column_completeness, integrity_issues)
-            }
-            
-        except Exception as e:
-            logger.error(f"Completeness analysis failed: {str(e)}")
+            logger.error(f"❌ Failed to get table basic info: {str(e)}")
             return {"error": str(e)}
     
-    async def _analyze_distribution_enhanced(
+    async def analyze_columns(
         self, 
-        connection, 
-        table_name: str, 
-        columns_info: List[Dict], 
-        sampling_info: Dict,
-        base_table_name: str,
+        table_name: str,
+        columns: List[str],
+        analysis_types: List[str] = ["both"],
+        sample_size: int = 100000,
         catalog_name: Optional[str] = None,
         db_name: Optional[str] = None,
         detailed_response: bool = False
     ) -> Dict[str, Any]:
-        """Enhanced distribution analysis including physical data distribution"""
+        """
+        Analyze completeness and distribution of specified columns
+        
+        Args:
+            table_name: Table name
+            columns: List of column names to analyze
+            analysis_types: List of analysis types, options: ["completeness", "distribution", "both"]
+            sample_size: Sample size
+            catalog_name: Catalog name (optional)
+            db_name: Database name (optional)
+            detailed_response: Whether to return detailed response
+        
+        Returns:
+            Dictionary containing column analysis results:
+            - table_name: Table name
+            - columns_analyzed: Number of columns analyzed
+            - completeness_analysis: Completeness analysis results (if requested)
+            - distribution_analysis: Distribution analysis results (if requested)
+            - sampling_info: Sampling information
+        """
         try:
             start_time = time.time()
+            logger.info(f"🔍 Analyzing columns for table: {table_name}")
+            logger.info(f"📊 Columns to analyze: {columns}")
+            logger.info(f"🎯 Analysis types: {analysis_types}")
+            logger.info(f"📏 Sample size: {sample_size:,}")
             
-            # === 1. Statistical Data Distribution Analysis ===
-            statistical_analysis = await self._analyze_statistical_distribution(
-                connection, table_name, columns_info, sampling_info
-            )
-            
-            # === 2. Physical Data Distribution Analysis ===
-            physical_analysis = await self._analyze_physical_distribution(
-                connection, base_table_name, catalog_name, db_name, detailed_response
-            )
-            
-            # === 3. Storage Distribution Analysis ===
-            storage_analysis = await self._analyze_storage_distribution(
-                connection, table_name, sampling_info
-            )
-            
-            # === 4. Generate comprehensive insights ===
-            distribution_insights = await self._generate_distribution_insights(
-                statistical_analysis, physical_analysis, storage_analysis
-            )
-            
-            execution_time = time.time() - start_time
-            
-            return {
-                "statistical_distribution": statistical_analysis,
-                "physical_distribution": physical_analysis,
-                "storage_distribution": storage_analysis,
-                "distribution_insights": distribution_insights,
-                "analysis_summary": {
-                    "execution_time_seconds": round(execution_time, 3),
-                    "analysis_timestamp": datetime.now().isoformat()
+            async with self.connection_manager.get_connection_context("query") as connection:
+                # Build full table name
+                full_table_name = self._build_full_table_name(table_name, catalog_name, db_name)
+                
+                # Get basic table information
+                table_info = await self._get_table_basic_info(connection, full_table_name)
+                if not table_info:
+                    return {"error": f"Table {full_table_name} not found"}
+                
+                # Get column information
+                all_columns_info = await self._get_table_columns_info(connection, table_name, catalog_name, db_name)
+                
+                # Filter specified columns
+                target_columns_info = [col for col in all_columns_info if col["column_name"] in columns]
+                if not target_columns_info:
+                    return {"error": f"None of the specified columns found in table {full_table_name}"}
+                
+                # Check column count limit
+                max_columns = self.config.data_quality.max_columns_per_batch
+                if len(target_columns_info) > max_columns:
+                    logger.warning(f"⚠️ Column count ({len(target_columns_info)}) exceeds batch limit ({max_columns}), processing first {max_columns} columns")
+                    target_columns_info = target_columns_info[:max_columns]
+                
+                # Determine sampling strategy (optimized version)
+                sampling_info = await self._determine_optimized_sampling_strategy(
+                    connection, full_table_name, table_info["row_count"], sample_size
+                )
+                
+                logger.info(f"📊 Using {sampling_info['sampling_method']} sampling: {sampling_info['sample_size']:,} rows")
+                
+                result = {
+                    "table_name": full_table_name,
+                    "analysis_timestamp": datetime.now().isoformat(),
+                    "columns_analyzed": len(target_columns_info),
+                    "analysis_types": analysis_types,
+                    "sampling_info": sampling_info
                 }
-            }
-            
+                
+                # Batch analysis (optimized version)
+                if self.config.data_quality.enable_batch_analysis:
+                    logger.info("🚀 Using batch analysis for improved performance...")
+                    batch_results = await self._analyze_columns_batch(
+                        connection, full_table_name, target_columns_info, sampling_info, analysis_types, detailed_response
+                    )
+                    result.update(batch_results)
+                else:
+                    # Execute completeness analysis
+                    if "completeness" in analysis_types or "both" in analysis_types:
+                        logger.info("🧩 Executing completeness analysis...")
+                        completeness_start = time.time()
+                        result["completeness_analysis"] = await self._analyze_completeness(
+                            connection, full_table_name, target_columns_info, sampling_info
+                        )
+                        completeness_time = time.time() - completeness_start
+                        logger.info(f"✅ Completeness analysis completed in {completeness_time:.2f}s")
+                    
+                    # Execute distribution analysis
+                    if "distribution" in analysis_types or "both" in analysis_types:
+                        logger.info("📈 Executing distribution analysis...")
+                        distribution_start = time.time()
+                        result["distribution_analysis"] = await self._analyze_distribution(
+                            connection, full_table_name, target_columns_info, sampling_info, detailed_response
+                        )
+                        distribution_time = time.time() - distribution_start
+                        logger.info(f"✅ Distribution analysis completed in {distribution_time:.2f}s")
+                
+                execution_time = time.time() - start_time
+                result["execution_time_seconds"] = round(execution_time, 3)
+                
+                logger.info(f"🎉 Column analysis completed in {execution_time:.2f}s")
+                return result
+                
         except Exception as e:
-            logger.error(f"Distribution analysis failed: {str(e)}")
+            logger.error(f"❌ Failed to analyze columns: {str(e)}")
             return {"error": str(e)}
     
-    async def _generate_unified_insights(
+    async def analyze_table_storage(
         self, 
-        completeness_analysis: Dict, 
-        distribution_analysis: Dict
-    ) -> Dict[str, Any]:
-        """Generate unified data quality insights"""
-        try:
-            # Comprehensive data quality scoring
-            completeness_score = completeness_analysis.get("overall_completeness_score", 0.0)
-            distribution_quality_score = distribution_analysis.get("quality_insights", {}).get("overall_distribution_quality_score", 0.0)
-            
-            # Weighted comprehensive score
-            overall_data_quality_score = (completeness_score * 0.6 + distribution_quality_score * 0.4)
-            
-            # Collect all issues
-            critical_issues = []
-            
-            # Completeness critical issues
-            completeness_issues = completeness_analysis.get("integrity_issues", [])
-            for issue in completeness_issues:
-                if issue.get("severity") == "high":
-                    critical_issues.append({
-                        "category": "completeness",
-                        "type": issue["type"],
-                        "description": issue["description"],
-                        "severity": issue["severity"]
-                    })
-            
-            # Distribution quality critical issues
-            distribution_issues = distribution_analysis.get("quality_insights", {}).get("quality_issues", [])
-            for issue in distribution_issues:
-                if issue.get("severity") in ["high", "critical"]:
-                    critical_issues.append({
-                        "category": "distribution",
-                        "type": issue["issue_type"],
-                        "description": issue["description"],
-                        "severity": issue["severity"]
-                    })
-            
-            # Generate unified recommendations
-            unified_recommendations = self._generate_unified_recommendations(
-                completeness_analysis, distribution_analysis, overall_data_quality_score
-            )
-            
-            # Quality grade assessment
-            quality_grade = self._assess_quality_grade(overall_data_quality_score)
-            
-            return {
-                "overall_data_quality_score": round(overall_data_quality_score, 3),
-                "quality_grade": quality_grade,
-                "component_scores": {
-                    "completeness_score": completeness_score,
-                    "distribution_quality_score": distribution_quality_score
-                },
-                "critical_issues": critical_issues,
-                "quality_recommendations": unified_recommendations,
-                "analysis_summary": {
-                    "total_issues_found": len(critical_issues),
-                    "completeness_issues": len([i for i in critical_issues if i["category"] == "completeness"]),
-                    "distribution_issues": len([i for i in critical_issues if i["category"] == "distribution"]),
-                    "recommendations_count": len(unified_recommendations)
-                }
-            }
-            
-        except Exception as e:
-            logger.error(f"Failed to generate unified insights: {str(e)}")
-            return {"error": str(e)}
-    
-    # ==== Physical Distribution Analysis Methods ====
-    
-    async def _analyze_physical_distribution(
-        self, 
-        connection, 
         table_name: str,
         catalog_name: Optional[str] = None,
         db_name: Optional[str] = None,
         detailed_response: bool = False
     ) -> Dict[str, Any]:
-        """Analyze physical data distribution across cluster nodes - returns aggregated metrics only"""
+        """
+        Analyze table's physical distribution and storage information
+        
+        Args:
+            table_name: Table name
+            catalog_name: Catalog name (optional)
+            db_name: Database name (optional)
+            detailed_response: Whether to return detailed response
+        
+        Returns:
+            Dictionary containing physical distribution and storage information:
+            - table_name: Table name
+            - physical_distribution: Physical distribution information
+            - storage_info: Storage information
+            - partition_distribution: Partition distribution information
+        """
         try:
-            # Get partition information
-            partitions_info = await self._get_table_partitions(connection, table_name, db_name)
+            start_time = time.time()
+            logger.info(f"🔍 Analyzing storage for table: {table_name}")
             
-            if not partitions_info:
-                return {
-                    "analysis_type": "physical_distribution",
-                    "partition_count": 0,
-                    "error": "No partition information available"
-                }
-            
-            # Aggregate all tablet information
-            all_tablets = []
-            total_partitions = len(partitions_info)
-            partition_summary = self._summarize_partitions(partitions_info)
-            
-            # Collect tablet data from all partitions
-            for partition in partitions_info:
-                partition_name = partition["PartitionName"]
-                tablets = await self._get_partition_tablets(connection, table_name, partition_name, db_name)
-                all_tablets.extend(tablets)
-            
-            if not all_tablets:
-                return {
-                    "analysis_type": "physical_distribution",
-                    "partition_count": total_partitions,
-                    "partition_summary": partition_summary,
-                    "tablet_count": 0,
-                    "error": "No tablet information available"
-                }
-            
-            # Aggregate node distribution metrics only (no raw data)
-            node_distribution = defaultdict(lambda: {"tablet_count": 0, "data_size": 0, "partition_count": 0})
-            backend_to_partitions = defaultdict(set)
-            
-            for tablet in all_tablets:
-                backend_id = tablet.get("BackendId", "unknown")
-                data_size = int(tablet.get("DataSize", 0))
-                partition_name = tablet.get("PartitionName", "default")
+            async with self.connection_manager.get_connection_context("query") as connection:
+                # Build full table name
+                full_table_name = self._build_full_table_name(table_name, catalog_name, db_name)
                 
-                node_distribution[backend_id]["tablet_count"] += 1
-                node_distribution[backend_id]["data_size"] += data_size
-                backend_to_partitions[backend_id].add(partition_name)
-            
-            # Add partition count per backend
-            for backend_id in node_distribution:
-                node_distribution[backend_id]["partition_count"] = len(backend_to_partitions[backend_id])
-            
-            # Calculate aggregated distribution metrics
-            distribution_metrics = self._calculate_distribution_metrics_aggregated(dict(node_distribution))
-            balance_analysis = self._analyze_data_balance(node_distribution)
-            
-            # Base response (always included)
-            result = {
-                "analysis_type": "physical_distribution",
-                "partition_count": total_partitions,
-                "tablet_count": len(all_tablets),
-                "backend_node_count": len(node_distribution),
-                "distribution_summary": {
-                    "backends": list(node_distribution.keys()),
-                    "tablet_balance_score": distribution_metrics["tablet_balance_score"],
-                    "data_balance_score": distribution_metrics["data_balance_score"],
-                    "overall_balance_score": distribution_metrics["overall_balance_score"],
-                    "total_data_size_bytes": distribution_metrics["total_data_size"],
-                    "avg_tablets_per_backend": distribution_metrics["avg_tablets_per_backend"],
-                    "max_tablets_per_backend": distribution_metrics["max_tablets_per_backend"],
-                    "min_tablets_per_backend": distribution_metrics["min_tablets_per_backend"]
-                },
-                "balance_status": balance_analysis["status"],
-                "risk_level": self._assess_distribution_risk(distribution_metrics),
-                "recommendations": balance_analysis["recommendations"][:3]  # Limit to top 3 recommendations
-            }
-            
-            # Add detailed information only if requested
-            if detailed_response:
-                result.update({
-                    "partition_details": partitions_info,  # Full partition information
-                    "tablet_details": all_tablets,  # Full tablet information
-                    "node_distribution_details": dict(node_distribution),  # Detailed node distribution
-                    "partition_summary": partition_summary,  # Extended partition summary
-                    "full_balance_analysis": balance_analysis,  # Complete balance analysis
-                    "full_recommendations": balance_analysis["recommendations"]  # All recommendations
-                })
-            else:
-                # Only include summary information for optimized response
-                result["partition_summary"] = partition_summary
-            
-            return result
-            
-        except Exception as e:
-            logger.error(f"Physical distribution analysis failed: {str(e)}")
-            return {
-                "analysis_type": "physical_distribution",
-                "error": f"Analysis failed: {str(e)}"
-            }
-    
-    async def _get_table_partitions(self, connection, table_name: str, db_name: Optional[str] = None) -> List[Dict]:
-        """Get table partition information using SHOW PARTITIONS"""
-        try:
-            if db_name:
-                sql = f"SHOW PARTITIONS FROM `{db_name}`.`{table_name}`"
-            else:
-                sql = f"SHOW PARTITIONS FROM `{table_name}`"
-            
-            result = await connection.execute(sql)
-            return result.data if result.data else []
-            
-        except Exception as e:
-            logger.warning(f"Failed to get partitions for {table_name}: {str(e)}")
-            return []
-    
-    async def _get_partition_tablets(self, connection, table_name: str, partition_name: str, db_name: Optional[str] = None) -> List[Dict]:
-        """Get tablet information for a specific partition using SHOW TABLETS"""
-        try:
-            # First try with partition specification (for partitioned tables)
-            if db_name:
-                sql = f"SHOW TABLETS FROM `{db_name}`.`{table_name}` PARTITION(`{partition_name}`)"
-            else:
-                sql = f"SHOW TABLETS FROM `{table_name}` PARTITION(`{partition_name}`)"
-            
-            result = await connection.execute(sql)
-            return result.data if result.data else []
-            
-        except Exception as e:
-            logger.warning(f"Failed to get tablets for {table_name}.{partition_name}: {str(e)}")
-            # Try alternative approach for non-partitioned tables
-            try:
-                if db_name:
-                    sql = f"SHOW TABLETS FROM `{db_name}`.`{table_name}`"
-                else:
-                    sql = f"SHOW TABLETS FROM `{table_name}`"
+                # Get basic table information
+                table_info = await self._get_table_basic_info(connection, full_table_name)
+                if not table_info:
+                    return {"error": f"Table {full_table_name} not found"}
                 
-                result = await connection.execute(sql)
-                return result.data if result.data else []
-            except Exception as e2:
-                logger.warning(f"Alternative tablet query also failed: {str(e2)}")
-                return []
-    
-    def _analyze_tablets_backend_distribution(self, tablets: List[Dict]) -> Dict[str, Any]:
-        """Analyze how tablets are distributed across backend nodes"""
-        backend_distribution = defaultdict(int)
-        total_tablets = len(tablets)
-        
-        for tablet in tablets:
-            backend_id = tablet.get("BackendId", "unknown")
-            backend_distribution[backend_id] += 1
-        
-        if total_tablets == 0:
-            return {"backend_count": 0, "distribution": {}, "balance_score": 1.0}
-        
-        # Calculate balance score (1.0 = perfect balance, 0.0 = completely unbalanced)
-        if len(backend_distribution) <= 1:
-            balance_score = 1.0
-        else:
-            tablet_counts = list(backend_distribution.values())
-            avg_tablets = statistics.mean(tablet_counts)
-            variance = statistics.variance(tablet_counts) if len(tablet_counts) > 1 else 0
-            balance_score = max(0.0, 1.0 - (variance / (avg_tablets ** 2)))
-        
-        return {
-            "backend_count": len(backend_distribution),
-            "distribution": dict(backend_distribution),
-            "balance_score": round(balance_score, 3),
-            "total_tablets": total_tablets
-        }
-    
-    def _calculate_distribution_metrics(self, node_distribution: Dict, tablet_distribution: Dict) -> Dict[str, Any]:
-        """Calculate overall distribution metrics (legacy method for compatibility)"""
-        if not node_distribution:
-            return {"total_nodes": 0, "balance_score": 1.0}
-        
-        total_tablets = sum(node["tablet_count"] for node in node_distribution.values())
-        total_data_size = sum(node["data_size"] for node in node_distribution.values())
-        
-        # Calculate tablet balance
-        tablet_counts = [node["tablet_count"] for node in node_distribution.values()]
-        tablet_balance_score = self._calculate_balance_score(tablet_counts)
-        
-        # Calculate data size balance
-        data_sizes = [node["data_size"] for node in node_distribution.values()]
-        data_balance_score = self._calculate_balance_score(data_sizes)
-        
-        return {
-            "total_nodes": len(node_distribution),
-            "total_tablets": total_tablets,
-            "total_data_size": total_data_size,
-            "tablet_balance_score": tablet_balance_score,
-            "data_balance_score": data_balance_score,
-            "overall_balance_score": round((tablet_balance_score + data_balance_score) / 2, 3)
-        }
-    
-    def _calculate_distribution_metrics_aggregated(self, node_distribution: Dict) -> Dict[str, Any]:
-        """Calculate aggregated distribution metrics for optimized response"""
-        if not node_distribution:
-            return {
-                "total_nodes": 0,
-                "tablet_balance_score": 1.0,
-                "data_balance_score": 1.0,
-                "overall_balance_score": 1.0,
-                "total_data_size": 0,
-                "avg_tablets_per_backend": 0,
-                "max_tablets_per_backend": 0,
-                "min_tablets_per_backend": 0
-            }
-        
-        # Extract metrics
-        tablet_counts = [node["tablet_count"] for node in node_distribution.values()]
-        data_sizes = [node["data_size"] for node in node_distribution.values()]
-        
-        # Calculate balance scores
-        tablet_balance_score = self._calculate_balance_score(tablet_counts)
-        data_balance_score = self._calculate_balance_score(data_sizes)
-        overall_balance_score = round((tablet_balance_score + data_balance_score) / 2, 3)
-        
-        # Aggregated statistics
-        total_tablets = sum(tablet_counts)
-        total_data_size = sum(data_sizes)
-        avg_tablets_per_backend = round(total_tablets / len(node_distribution), 1)
-        max_tablets_per_backend = max(tablet_counts) if tablet_counts else 0
-        min_tablets_per_backend = min(tablet_counts) if tablet_counts else 0
-        
-        return {
-            "total_nodes": len(node_distribution),
-            "tablet_balance_score": tablet_balance_score,
-            "data_balance_score": data_balance_score,
-            "overall_balance_score": overall_balance_score,
-            "total_data_size": total_data_size,
-            "avg_tablets_per_backend": avg_tablets_per_backend,
-            "max_tablets_per_backend": max_tablets_per_backend,
-            "min_tablets_per_backend": min_tablets_per_backend
-        }
-    
-    def _calculate_balance_score(self, values: List[float]) -> float:
-        """Calculate balance score for a list of values"""
-        if not values or len(values) <= 1:
-            return 1.0
-        
-        avg_value = statistics.mean(values)
-        if avg_value == 0:
-            return 1.0
-        
-        variance = statistics.variance(values)
-        balance_score = max(0.0, 1.0 - (variance / (avg_value ** 2)))
-        return round(balance_score, 3)
-    
-    def _analyze_data_balance(self, node_distribution: Dict) -> Dict[str, Any]:
-        """Analyze data balance across nodes and provide recommendations"""
-        if not node_distribution:
-            return {"status": "no_data", "recommendations": []}
-        
-        tablet_counts = [node["tablet_count"] for node in node_distribution.values()]
-        data_sizes = [node["data_size"] for node in node_distribution.values()]
-        
-        tablet_balance = self._calculate_balance_score(tablet_counts)
-        data_balance = self._calculate_balance_score(data_sizes)
-        
-        recommendations = []
-        issues = []
-        
-        if tablet_balance < 0.8:
-            issues.append("uneven_tablet_distribution")
-            recommendations.append({
-                "type": "rebalance_tablets",
-                "priority": "medium",
-                "description": "Tablet distribution is uneven across nodes",
-                "action": "Consider rebalancing tablets or adjusting bucketing strategy"
-            })
-        
-        if data_balance < 0.7:
-            issues.append("uneven_data_distribution")
-            recommendations.append({
-                "type": "rebalance_data",
-                "priority": "high",
-                "description": "Data size distribution is significantly uneven",
-                "action": "Review partitioning strategy and consider data rebalancing"
-            })
-        
-        # Check for hot spots (nodes with significantly more data)
-        if data_sizes:
-            max_data = max(data_sizes)
-            avg_data = statistics.mean(data_sizes)
-            if max_data > avg_data * 2:
-                issues.append("data_hotspot")
-                recommendations.append({
-                    "type": "hotspot_mitigation",
-                    "priority": "high",
-                    "description": "Detected potential data hotspots",
-                    "action": "Investigate nodes with excessive data and redistribute load"
-                })
-        
-        return {
-            "tablet_balance_score": tablet_balance,
-            "data_balance_score": data_balance,
-            "issues": issues,
-            "recommendations": recommendations,
-            "status": "balanced" if tablet_balance > 0.8 and data_balance > 0.8 else "unbalanced"
-        }
-    
-    def _summarize_partitions(self, partitions: List[Dict]) -> Dict[str, Any]:
-        """Summarize partition information without returning full details"""
-        if not partitions:
-            return {"partition_count": 0}
-        
-        # Extract key metrics only
-        partition_types = set()
-        storage_mediums = set()
-        total_data_size = 0
-        bucket_counts = []
-        
-        for partition in partitions:
-            # Get partition type (Range, List, etc.)
-            partition_type = partition.get("Type", "Unknown")
-            partition_types.add(partition_type)
-            
-            # Get storage medium
-            storage_medium = partition.get("StorageMedium", "Unknown")
-            storage_mediums.add(storage_medium)
-            
-            # Get data size (if available)
-            data_size = partition.get("DataSize", 0)
-            if isinstance(data_size, (int, float)):
-                total_data_size += data_size
-            
-            # Get bucket count
-            buckets = partition.get("Buckets", 0)
-            if isinstance(buckets, int):
-                bucket_counts.append(buckets)
-        
-        return {
-            "partition_count": len(partitions),
-            "partition_types": list(partition_types),
-            "storage_mediums": list(storage_mediums),
-            "total_data_size_bytes": total_data_size,
-            "bucket_config": {
-                "unique_bucket_counts": list(set(bucket_counts)) if bucket_counts else [],
-                "avg_buckets": round(statistics.mean(bucket_counts), 1) if bucket_counts else 0
-            }
-        }
-    
-    def _assess_distribution_risk(self, distribution_metrics: Dict) -> str:
-        """Assess distribution risk level based on balance scores"""
-        tablet_balance = distribution_metrics.get("tablet_balance_score", 1.0)
-        data_balance = distribution_metrics.get("data_balance_score", 1.0)
-        overall_balance = distribution_metrics.get("overall_balance_score", 1.0)
-        
-        if overall_balance >= 0.9:
-            return "low"
-        elif overall_balance >= 0.7:
-            return "medium"
-        else:
-            return "high"
-    
-    # ==== Statistical Distribution Analysis Methods ====
-    
-    async def _analyze_statistical_distribution(
-        self, 
-        connection, 
-        table_name: str, 
-        columns_info: List[Dict], 
-        sampling_info: Dict
-    ) -> Dict[str, Any]:
-        """Analyze statistical data distribution patterns"""
-        try:
-            analysis_results = {}
-            
-            # Analyze numeric columns
-            numeric_columns = [col for col in columns_info if self._is_numeric_type(col["data_type"])]
-            if numeric_columns:
-                analysis_results["numeric_columns"] = await self._analyze_numeric_distributions(
-                    connection, table_name, numeric_columns, sampling_info
+                result = {
+                    "table_name": full_table_name,
+                    "analysis_timestamp": datetime.now().isoformat()
+                }
+                
+                # Analyze physical distribution
+                logger.info("🏗️ Analyzing physical distribution...")
+                physical_start = time.time()
+                result["physical_distribution"] = await self._analyze_physical_distribution(
+                    connection, table_name, catalog_name, db_name, detailed_response
                 )
-            
-            # Analyze categorical columns
-            categorical_columns = [col for col in columns_info if self._is_categorical_type(col["data_type"])]
-            if categorical_columns:
-                analysis_results["categorical_columns"] = await self._analyze_categorical_distributions(
-                    connection, table_name, categorical_columns, sampling_info
+                physical_time = time.time() - physical_start
+                logger.info(f"✅ Physical distribution analysis completed in {physical_time:.2f}s")
+                
+                # Analyze storage information
+                logger.info("💾 Analyzing storage information...")
+                storage_start = time.time()
+                result["storage_info"] = await self._analyze_storage_info(
+                    connection, full_table_name, detailed_response
                 )
-            
-            # Analyze temporal columns
-            temporal_columns = [col for col in columns_info if self._is_temporal_type(col["data_type"])]
-            if temporal_columns:
-                analysis_results["temporal_columns"] = await self._analyze_temporal_distributions(
-                    connection, table_name, temporal_columns, sampling_info
-                )
-            
-            # Generate quality insights
-            analysis_results["quality_insights"] = await self._generate_data_quality_insights(
-                connection, table_name, columns_info, sampling_info
-            )
-            
-            return analysis_results
-            
+                storage_time = time.time() - storage_start
+                logger.info(f"✅ Storage analysis completed in {storage_time:.2f}s")
+                
+                execution_time = time.time() - start_time
+                result["execution_time_seconds"] = round(execution_time, 3)
+                
+                logger.info(f"🎉 Storage analysis completed in {execution_time:.2f}s")
+                return result
+                
         except Exception as e:
-            logger.error(f"Statistical distribution analysis failed: {str(e)}")
-            return {"error": str(e)}
-
-    # ==== Storage Distribution Analysis Methods ====
-    
-    async def _analyze_storage_distribution(self, connection, table_name: str, sampling_info: Dict) -> Dict[str, Any]:
-        """Analyze storage-level data distribution"""
-        try:
-            # Get storage medium information
-            storage_info = await self._get_storage_information(connection, table_name)
-            
-            # Get data size distribution
-            size_distribution = await self._get_data_size_distribution(connection, table_name)
-            
-            return {
-                "storage_media": storage_info,
-                "size_distribution": size_distribution,
-                "compression_analysis": await self._analyze_compression_efficiency(connection, table_name)
-            }
-            
-        except Exception as e:
-            logger.error(f"Storage distribution analysis failed: {str(e)}")
+            logger.error(f"❌ Failed to analyze table storage: {str(e)}")
             return {"error": str(e)}
     
-    async def _get_storage_information(self, connection, table_name: str) -> Dict[str, Any]:
-        """Get storage medium and policy information"""
-        try:
-            # This would typically come from SHOW PARTITIONS or system tables
-            # For now, we'll return a placeholder structure
-            return {
-                "primary_medium": "HDD",
-                "cold_storage_policy": None,
-                "compression_type": "LZ4"
-            }
-        except Exception as e:
-            logger.warning(f"Failed to get storage info: {str(e)}")
-            return {}
-    
-    async def _get_data_size_distribution(self, connection, table_name: str) -> Dict[str, Any]:
-        """Get data size distribution across partitions/tablets"""
-        try:
-            # Placeholder for data size analysis
-            return {
-                "total_size_bytes": 0,
-                "avg_partition_size": 0,
-                "size_variance": 0
-            }
-        except Exception as e:
-            logger.warning(f"Failed to get size distribution: {str(e)}")
-            return {}
-    
-    async def _analyze_compression_efficiency(self, connection, table_name: str) -> Dict[str, Any]:
-        """Analyze data compression efficiency"""
-        try:
-            # Placeholder for compression analysis
-            return {
-                "compression_ratio": 1.0,
-                "efficiency_score": 0.8
-            }
-        except Exception as e:
-            logger.warning(f"Failed to analyze compression: {str(e)}")
-            return {}
-    
-    # ==== Distribution Insights Generation ====
-    
-    async def _generate_distribution_insights(
-        self, 
-        statistical_analysis: Dict, 
-        physical_analysis: Dict, 
-        storage_analysis: Dict
-    ) -> Dict[str, Any]:
-        """Generate comprehensive distribution insights"""
-        insights = {
-            "overall_distribution_health": "good",
-            "key_findings": [],
-            "recommendations": [],
-            "risk_assessment": "low"
-        }
-        
-        # Analyze physical distribution health
-        if "distribution_metrics" in physical_analysis:
-            balance_score = physical_analysis["distribution_metrics"].get("overall_balance_score", 1.0)
-            if balance_score < 0.7:
-                insights["overall_distribution_health"] = "poor"
-                insights["risk_assessment"] = "high"
-                insights["key_findings"].append("Significant data imbalance detected across cluster nodes")
-                insights["recommendations"].append({
-                    "type": "cluster_rebalancing",
-                    "priority": "high",
-                    "description": "Implement data rebalancing to improve cluster performance"
-                })
-            elif balance_score < 0.9:
-                insights["overall_distribution_health"] = "fair"
-                insights["risk_assessment"] = "medium"
-        
-        # Analyze statistical distribution patterns
-        if "quality_insights" in statistical_analysis:
-            quality_score = statistical_analysis["quality_insights"].get("overall_distribution_quality_score", 1.0)
-            if quality_score < 0.8:
-                insights["key_findings"].append("Data quality issues detected in statistical distribution")
-        
-        return insights
-
-    # ==== Utility Methods ====
+    # ===========================================
+    # Internal helper methods
+    # ===========================================
     
     def _build_full_table_name(self, table_name: str, catalog_name: Optional[str], db_name: Optional[str]) -> str:
         """Build full table name"""
         if catalog_name and db_name:
             return f"{catalog_name}.{db_name}.{table_name}"
         elif db_name:
-            return f"internal.{db_name}.{table_name}"
+            return f"{db_name}.{table_name}"
         else:
-            # Default to internal as catalog
-            return f"internal.information_schema.{table_name}" if "." not in table_name else table_name
+            return table_name
     
     async def _get_table_basic_info(self, connection, table_name: str) -> Optional[Dict]:
-        """Get table basic information"""
+        """Get basic table information"""
         try:
-            parts = table_name.split('.')
-            if len(parts) >= 3:
-                catalog, db, tbl = parts[0], parts[1], parts[2]
-                sql = f"SELECT COUNT(*) as row_count FROM {table_name}"
-            else:
-                sql = f"SELECT COUNT(*) as row_count FROM {table_name}"
-            
-            result = await connection.execute(sql)
+            # Try to get row count
+            count_sql = f"SELECT COUNT(*) as row_count FROM {table_name}"
+            result = await connection.execute(count_sql)
             if result.data:
                 return {"row_count": result.data[0]["row_count"]}
             return None
@@ -890,267 +323,526 @@ class DataQualityTools:
     async def _get_table_columns_info(self, connection, table_name: str, catalog_name: Optional[str], db_name: Optional[str]) -> List[Dict]:
         """Get table column information"""
         try:
-            # Use DESCRIBE statement to get column information
+            # Build DESCRIBE query
             describe_sql = f"DESCRIBE {self._build_full_table_name(table_name, catalog_name, db_name)}"
             result = await connection.execute(describe_sql)
             
-            columns = []
+            columns_info = []
             if result.data:
                 for row in result.data:
-                    columns.append({
+                    columns_info.append({
                         "column_name": row["Field"],
                         "data_type": row["Type"],
-                        "is_nullable": row["Null"] == "YES"
+                        "nullable": row["Null"] == "YES",
+                        "default_value": row["Default"],
+                        "column_comment": row.get("Comment", "")
                     })
             
-            return columns
+            return columns_info
         except Exception as e:
             logger.warning(f"Failed to get table columns info: {str(e)}")
             return []
-
+    
+    async def _get_table_partitions(self, connection, table_name: str, db_name: Optional[str] = None) -> List[Dict]:
+        """Get table partition information"""
+        try:
+            # Query partition information
+            partition_sql = f"""
+            SELECT 
+                PARTITION_NAME,
+                PARTITION_DESCRIPTION,
+                TABLE_ROWS,
+                DATA_LENGTH,
+                INDEX_LENGTH
+            FROM information_schema.PARTITIONS 
+            WHERE TABLE_SCHEMA = '{db_name or ""}' 
+            AND TABLE_NAME = '{table_name}'
+            AND PARTITION_NAME IS NOT NULL
+            """
+            
+            result = await connection.execute(partition_sql)
+            partitions = []
+            if result.data:
+                for row in result.data:
+                    partitions.append({
+                        "partition_name": row["PARTITION_NAME"],
+                        "partition_description": row["PARTITION_DESCRIPTION"],
+                        "table_rows": row["TABLE_ROWS"],
+                        "data_length": row["DATA_LENGTH"],
+                        "index_length": row["INDEX_LENGTH"]
+                    })
+            
+            return partitions
+        except Exception as e:
+            logger.warning(f"Failed to get table partitions: {str(e)}")
+            return []
+    
+    async def _get_table_size_info(self, connection, table_name: str) -> Dict[str, Any]:
+        """Get table size information"""
+        try:
+            # Query table size information
+            size_sql = f"""
+            SELECT 
+                table_name,
+                engine,
+                table_rows,
+                data_length,
+                index_length,
+                (data_length + index_length) as total_size
+            FROM information_schema.tables 
+            WHERE table_name = '{table_name.split('.')[-1]}'
+            """
+            
+            result = await connection.execute(size_sql)
+            if result.data and result.data[0]:
+                row = result.data[0]
+                return {
+                    "engine": row.get("engine", "Unknown"),
+                    "estimated_rows": row.get("table_rows", 0),
+                    "data_length": row.get("data_length", 0),
+                    "index_length": row.get("index_length", 0),
+                    "total_size": row.get("total_size", 0)
+                }
+            
+            return {"engine": "Unknown", "estimated_rows": 0, "data_length": 0, "index_length": 0, "total_size": 0}
+        except Exception as e:
+            logger.warning(f"Failed to get table size info: {str(e)}")
+            return {"engine": "Unknown", "estimated_rows": 0, "data_length": 0, "index_length": 0, "total_size": 0}
+    
     async def _determine_sampling_strategy(self, connection, table_name: str, total_rows: int, sample_size: int) -> Dict[str, Any]:
-        """Determine sampling strategy"""
-        if total_rows <= sample_size:
+        """Determine sampling strategy (compatibility version)"""
+        return await self._determine_optimized_sampling_strategy(connection, table_name, total_rows, sample_size)
+    
+    async def _determine_optimized_sampling_strategy(self, connection, table_name: str, total_rows: int, sample_size: int) -> Dict[str, Any]:
+        """Determine optimized sampling strategy"""
+        # Use thresholds from configuration
+        small_threshold = self.config.data_quality.small_table_threshold
+        medium_threshold = self.config.data_quality.medium_table_threshold
+        
+        if total_rows <= sample_size or sample_size <= 0:
             return {
-                "is_sampled": False,
                 "sample_size": total_rows,
                 "sample_rate": 1.0,
                 "sample_table_expression": table_name,
-                "sample_query_suffix": ""
+                "sampling_method": "full_table",
+                "total_rows": total_rows
+            }
+        
+        sample_rate = sample_size / total_rows
+        
+        # Stratified sampling strategy
+        if total_rows <= small_threshold:
+            # Small table: analyze full table directly
+            return {
+                "sample_size": total_rows,
+                "sample_rate": 1.0,
+                "sample_table_expression": table_name,
+                "sampling_method": "full_table_small",
+                "total_rows": total_rows
+            }
+        elif total_rows <= medium_threshold:
+            # Medium table: simple LIMIT sampling (avoid ORDER BY RAND())
+            sample_table_expr = f"(SELECT * FROM {table_name} LIMIT {sample_size}) AS sample_table"
+            return {
+                "sample_size": sample_size,
+                "sample_rate": sample_rate,
+                "sample_table_expression": sample_table_expr,
+                "sampling_method": "limit_sampling",
+                "total_rows": total_rows
             }
         else:
-            sample_rate = sample_size / total_rows
+            # Large table: use simpler sampling strategy
+            # For very large tables, still use LIMIT but increase sample size to improve representativeness
+            adjusted_sample_size = min(sample_size * 2, total_rows // 100)  # At most 1% sampling
+            sample_table_expr = f"(SELECT * FROM {table_name} LIMIT {adjusted_sample_size}) AS sample_table"
             return {
-                "is_sampled": True,
-                "sample_size": sample_size,
-                "sample_rate": round(sample_rate, 4),
-                "sample_table_expression": f"(SELECT * FROM {table_name} ORDER BY RAND() LIMIT {sample_size}) AS sampled_table",
-                "sample_query_suffix": f"ORDER BY RAND() LIMIT {sample_size}"
+                "sample_size": adjusted_sample_size,
+                "sample_rate": adjusted_sample_size / total_rows,
+                "sample_table_expression": sample_table_expr,
+                "sampling_method": "enhanced_limit_sampling",
+                "total_rows": total_rows,
+                "original_sample_size": sample_size
             }
     
-    def _select_analysis_columns(self, columns_info: List[Dict], include_all: bool) -> List[Dict]:
-        """Select columns to analyze"""
-        if include_all:
-            return columns_info
-        
-        # Select key column types
-        key_columns = []
-        for col in columns_info:
-            col_name = col["column_name"].lower()
-            data_type = col["data_type"].lower()
-            
-            # Skip system columns and binary columns
-            if col_name.startswith('__') or 'binary' in data_type or 'blob' in data_type:
-                continue
-            
-            key_columns.append(col)
-        
-        return key_columns[:20]  # Limit to max 20 columns
-    
-    def _is_numeric_type(self, data_type: str) -> bool:
-        """Determine if it's a numeric type"""
-        numeric_types = ['int', 'bigint', 'smallint', 'tinyint', 'decimal', 'float', 'double']
-        return any(t in data_type.lower() for t in numeric_types)
-    
-    def _is_categorical_type(self, data_type: str) -> bool:
-        """Determine if it's a categorical type"""
-        return not self._is_numeric_type(data_type) and not self._is_temporal_type(data_type)
-    
-    def _is_temporal_type(self, data_type: str) -> bool:
-        """Determine if it's a temporal type"""
-        temporal_types = ['date', 'datetime', 'timestamp', 'time']
-        return any(t in data_type.lower() for t in temporal_types)
-    
-    async def _analyze_column_completeness_unified(self, connection, table_name: str, columns_info: List[Dict], sampling_info: Dict) -> Dict[str, Any]:
-        """Unified column completeness analysis"""
-        column_completeness = {}
+    async def _analyze_columns_batch(self, connection, table_name: str, columns_info: List[Dict], 
+                                   sampling_info: Dict, analysis_types: List[str], detailed_response: bool) -> Dict[str, Any]:
+        """Batch analyze multiple columns (optimized version)"""
+        result = {}
         table_expr = sampling_info.get("sample_table_expression", table_name)
         
-        for column in columns_info:
-            column_name = column["column_name"]
+        try:
+            # Build batch SQL queries
+            if "completeness" in analysis_types or "both" in analysis_types:
+                logger.info("🧩 Executing batch completeness analysis...")
+                completeness_start = time.time()
+                result["completeness_analysis"] = await self._analyze_completeness_batch(
+                    connection, table_expr, columns_info
+                )
+                completeness_time = time.time() - completeness_start
+                logger.info(f"✅ Batch completeness analysis completed in {completeness_time:.2f}s")
+            
+            if "distribution" in analysis_types or "both" in analysis_types:
+                logger.info("📈 Executing batch distribution analysis...")
+                distribution_start = time.time()
+                result["distribution_analysis"] = await self._analyze_distribution_batch(
+                    connection, table_expr, columns_info, detailed_response
+                )
+                distribution_time = time.time() - distribution_start
+                logger.info(f"✅ Batch distribution analysis completed in {distribution_time:.2f}s")
+            
+            return result
+            
+        except Exception as e:
+            logger.error(f"❌ Batch analysis failed: {str(e)}")
+            # Fallback to sequential analysis
+            logger.info("🔄 Falling back to sequential analysis...")
+            return await self._analyze_columns_sequential(connection, table_name, columns_info, sampling_info, analysis_types, detailed_response)
+    
+    async def _analyze_columns_sequential(self, connection, table_name: str, columns_info: List[Dict], 
+                                        sampling_info: Dict, analysis_types: List[str], detailed_response: bool) -> Dict[str, Any]:
+        """Sequential column analysis (fallback solution)"""
+        result = {}
+        
+        if "completeness" in analysis_types or "both" in analysis_types:
+            result["completeness_analysis"] = await self._analyze_completeness(
+                connection, table_name, columns_info, sampling_info
+            )
+        
+        if "distribution" in analysis_types or "both" in analysis_types:
+            result["distribution_analysis"] = await self._analyze_distribution(
+                connection, table_name, columns_info, sampling_info, detailed_response
+            )
+        
+        return result
+    
+    async def _analyze_completeness_batch(self, connection, table_expr: str, columns_info: List[Dict]) -> Dict[str, Any]:
+        """Batch completeness analysis"""
+        try:
+            # Build batch completeness query
+            select_clauses = []
+            
+            # First get total row count
+            select_clauses.append("COUNT(*) as total_rows")
+            
+            # Then add statistics for each column
+            for col in columns_info:
+                col_name = col["column_name"]
+                select_clauses.extend([
+                    f"COUNT({col_name}) as {col_name}_non_null",
+                    f"COUNT(DISTINCT {col_name}) as {col_name}_distinct"
+                ])
+            
+            batch_sql = f"SELECT {', '.join(select_clauses)} FROM {table_expr}"
+            
+            result = await connection.execute(batch_sql)
+            if not result.data:
+                return {"error": "No data returned from batch completeness query"}
+            
+            row = result.data[0]
+            completeness_results = {}
+            total_rows = row["total_rows"]
+            
+            for col in columns_info:
+                col_name = col["column_name"]
+                non_null = row[f"{col_name}_non_null"]
+                distinct = row[f"{col_name}_distinct"]
+                
+                null_count = total_rows - non_null
+                null_rate = null_count / total_rows if total_rows > 0 else 0
+                completeness_score = 1 - null_rate
+                
+                completeness_results[col_name] = {
+                    "total_rows": total_rows,
+                    "non_null_count": non_null,
+                    "null_count": null_count,
+                    "null_rate": round(null_rate, 4),
+                    "completeness_score": round(completeness_score, 4),
+                    "distinct_count": distinct,
+                    "uniqueness_ratio": round(distinct / non_null, 4) if non_null > 0 else 0
+                }
+            
+            return completeness_results
+            
+        except Exception as e:
+            logger.error(f"❌ Batch completeness analysis failed: {str(e)}")
+            raise
+    
+    async def _analyze_distribution_batch(self, connection, table_expr: str, columns_info: List[Dict], detailed_response: bool) -> Dict[str, Any]:
+        """Batch distribution analysis"""
+        try:
+            # Classify columns
+            numeric_columns = [col for col in columns_info if self._is_numeric_type(col["data_type"])]
+            categorical_columns = [col for col in columns_info if self._is_categorical_type(col["data_type"])]
+            temporal_columns = [col for col in columns_info if self._is_temporal_type(col["data_type"])]
+            
+            distribution_results = {}
+            
+            # Batch numeric analysis
+            if numeric_columns:
+                logger.info(f"📊 Batch analyzing {len(numeric_columns)} numeric columns...")
+                numeric_results = await self._analyze_numeric_distributions_batch(connection, table_expr, numeric_columns)
+                distribution_results.update(numeric_results)
+            
+            # Batch categorical analysis
+            if categorical_columns:
+                logger.info(f"📊 Batch analyzing {len(categorical_columns)} categorical columns...")
+                categorical_results = await self._analyze_categorical_distributions_batch(connection, table_expr, categorical_columns)
+                distribution_results.update(categorical_results)
+            
+            # Batch temporal analysis
+            if temporal_columns:
+                logger.info(f"📊 Batch analyzing {len(temporal_columns)} temporal columns...")
+                temporal_results = await self._analyze_temporal_distributions_batch(connection, table_expr, temporal_columns)
+                distribution_results.update(temporal_results)
+            
+            return distribution_results
+            
+        except Exception as e:
+            logger.error(f"❌ Batch distribution analysis failed: {str(e)}")
+            raise
+    
+    async def _analyze_numeric_distributions_batch(self, connection, table_expr: str, numeric_columns: List[Dict]) -> Dict[str, Any]:
+        """Batch numeric distribution analysis"""
+        try:
+            select_clauses = []
+            for col in numeric_columns:
+                col_name = col["column_name"]
+                select_clauses.extend([
+                    f"MIN({col_name}) as {col_name}_min",
+                    f"MAX({col_name}) as {col_name}_max",
+                    f"AVG({col_name}) as {col_name}_avg",
+                    f"STDDEV({col_name}) as {col_name}_stddev"
+                ])
+            
+            batch_sql = f"SELECT {', '.join(select_clauses)} FROM {table_expr}"
+            
+            result = await connection.execute(batch_sql)
+            if not result.data:
+                return {}
+            
+            row = result.data[0]
+            numeric_results = {}
+            
+            for col in numeric_columns:
+                col_name = col["column_name"]
+                numeric_results[col_name] = {
+                    "data_type": "numeric",
+                    "min_value": row[f"{col_name}_min"],
+                    "max_value": row[f"{col_name}_max"],
+                    "mean": round(float(row[f"{col_name}_avg"]), 4) if row[f"{col_name}_avg"] is not None else None,
+                    "std_dev": round(float(row[f"{col_name}_stddev"]), 4) if row[f"{col_name}_stddev"] is not None else None
+                }
+            
+            return numeric_results
+            
+        except Exception as e:
+            logger.error(f"❌ Batch numeric analysis failed: {str(e)}")
+            return {}
+    
+    async def _analyze_categorical_distributions_batch(self, connection, table_expr: str, categorical_columns: List[Dict]) -> Dict[str, Any]:
+        """Batch categorical distribution analysis"""
+        categorical_results = {}
+        
+        # For categorical data, need to analyze column by column to get frequency distribution
+        for col in categorical_columns:
+            col_name = col["column_name"]
             try:
-                null_sql = f"""
+                # Get top 10 most frequent values
+                freq_sql = f"""
+                SELECT {col_name}, COUNT(*) as frequency
+                FROM {table_expr}
+                WHERE {col_name} IS NOT NULL
+                GROUP BY {col_name}
+                ORDER BY frequency DESC
+                LIMIT 10
+                """
+                
+                result = await connection.execute(freq_sql)
+                frequencies = result.data if result.data else []
+                
+                categorical_results[col_name] = {
+                    "data_type": "categorical",
+                    "top_values": frequencies
+                }
+                
+            except Exception as e:
+                logger.warning(f"Failed to analyze categorical column {col_name}: {str(e)}")
+                categorical_results[col_name] = {
+                    "data_type": "categorical",
+                    "error": str(e)
+                }
+        
+        return categorical_results
+    
+    async def _analyze_temporal_distributions_batch(self, connection, table_expr: str, temporal_columns: List[Dict]) -> Dict[str, Any]:
+        """Batch temporal distribution analysis"""
+        try:
+            select_clauses = []
+            for col in temporal_columns:
+                col_name = col["column_name"]
+                select_clauses.extend([
+                    f"MIN({col_name}) as {col_name}_min",
+                    f"MAX({col_name}) as {col_name}_max"
+                ])
+            
+            if not select_clauses:
+                return {}
+            
+            batch_sql = f"SELECT {', '.join(select_clauses)} FROM {table_expr}"
+            
+            result = await connection.execute(batch_sql)
+            if not result.data:
+                return {}
+            
+            row = result.data[0]
+            temporal_results = {}
+            
+            for col in temporal_columns:
+                col_name = col["column_name"]
+                temporal_results[col_name] = {
+                    "data_type": "temporal",
+                    "min_value": row[f"{col_name}_min"],
+                    "max_value": row[f"{col_name}_max"]
+                }
+            
+            return temporal_results
+            
+        except Exception as e:
+            logger.error(f"❌ Batch temporal analysis failed: {str(e)}")
+            return {}
+    
+    async def _analyze_completeness(self, connection, table_name: str, columns_info: List[Dict], sampling_info: Dict) -> Dict[str, Any]:
+        """Analyze column completeness"""
+        logger.info(f"🔍 Analyzing completeness for {len(columns_info)} columns")
+        
+        completeness_results = {}
+        table_expr = sampling_info.get("sample_table_expression", table_name)
+        
+        for i, column in enumerate(columns_info, 1):
+            col_name = column["column_name"]
+            logger.info(f"  📊 [{i}/{len(columns_info)}] Analyzing column: {col_name}")
+            
+            try:
+                # Completeness analysis SQL
+                completeness_sql = f"""
                 SELECT 
                     COUNT(*) as total_count,
-                    COUNT({column_name}) as non_null_count,
-                    COUNT(*) - COUNT({column_name}) as null_count
+                    COUNT({col_name}) as non_null_count,
+                    COUNT(*) - COUNT({col_name}) as null_count
                 FROM {table_expr}
                 """
                 
-                result = await connection.execute(null_sql)
+                result = await connection.execute(completeness_sql)
                 if result.data:
                     stats = result.data[0]
                     total_count = stats["total_count"]
+                    non_null_count = stats["non_null_count"]
                     null_count = stats["null_count"]
-                    null_rate = null_count / total_count if total_count > 0 else 0
-                    completeness_score = 1.0 - null_rate
                     
-                    column_completeness[column_name] = {
+                    null_rate = null_count / total_count if total_count > 0 else 0
+                    completeness_score = 1 - null_rate
+                    
+                    completeness_results[col_name] = {
                         "data_type": column["data_type"],
-                        "is_nullable": column["is_nullable"],
                         "total_count": total_count,
+                        "non_null_count": non_null_count,
                         "null_count": null_count,
-                        "non_null_count": stats["non_null_count"],
                         "null_rate": round(null_rate, 4),
                         "completeness_score": round(completeness_score, 4)
                     }
                     
+                    if null_rate > 0.1:  # More than 10% null rate
+                        logger.info(f"    ⚠️  High null rate: {null_rate:.1%}")
+                    else:
+                        logger.info(f"    ✅ Good completeness: {completeness_score:.1%}")
+                        
             except Exception as e:
-                logger.warning(f"Failed to analyze completeness for column {column_name}: {str(e)}")
-                column_completeness[column_name] = {
-                    "error": str(e),
-                    "completeness_score": 0.0
-                }
+                logger.warning(f"Failed to analyze completeness for column {col_name}: {str(e)}")
+                completeness_results[col_name] = {"error": str(e)}
         
-        return column_completeness
+        # Calculate overall completeness score
+        valid_scores = [
+            result["completeness_score"] 
+            for result in completeness_results.values() 
+            if isinstance(result, dict) and "completeness_score" in result
+        ]
+        overall_score = sum(valid_scores) / len(valid_scores) if valid_scores else 0
+        
+        return {
+            "column_completeness": completeness_results,
+            "overall_completeness_score": round(overall_score, 4),
+            "summary": {
+                "total_columns": len(columns_info),
+                "analyzed_columns": len([r for r in completeness_results.values() if "error" not in r]),
+                "perfect_completeness_columns": len([r for r in completeness_results.values() 
+                                                   if isinstance(r, dict) and r.get("completeness_score") == 1.0])
+            }
+        }
     
-    async def _check_business_rule_compliance(self, connection, table_name: str, business_rules: List[Dict], sampling_info: Dict) -> Dict[str, Any]:
-        """Check business rule compliance"""
-        compliance_results = {}
+    async def _analyze_distribution(self, connection, table_name: str, columns_info: List[Dict], sampling_info: Dict, detailed_response: bool) -> Dict[str, Any]:
+        """Analyze column distribution"""
+        logger.info(f"🔍 Analyzing distribution for {len(columns_info)} columns")
+        
         table_expr = sampling_info.get("sample_table_expression", table_name)
         
-        for rule in business_rules:
-            rule_name = rule.get("rule_name", "unknown")
-            sql_condition = rule.get("sql_condition", "")
-            
-            if not sql_condition:
-                continue
-                
-            try:
-                compliance_sql = f"""
-                SELECT 
-                    COUNT(*) as total_count,
-                    SUM(CASE WHEN {sql_condition} THEN 1 ELSE 0 END) as pass_count
-                FROM {table_expr}
-                """
-                
-                result = await connection.execute(compliance_sql)
-                if result.data:
-                    stats = result.data[0]
-                    total_count = stats["total_count"]
-                    pass_count = stats["pass_count"] or 0
-                    fail_count = total_count - pass_count
-                    pass_rate = pass_count / total_count if total_count > 0 else 0
-                    
-                    compliance_results[rule_name] = {
-                        "rule_condition": sql_condition,
-                        "total_records": total_count,
-                        "pass_count": pass_count,
-                        "fail_count": fail_count,
-                        "pass_rate": round(pass_rate, 4),
-                        "compliance_score": round(pass_rate, 4)
-                    }
-                    
-            except Exception as e:
-                logger.warning(f"Failed to check business rule {rule_name}: {str(e)}")
-                compliance_results[rule_name] = {
-                    "error": str(e),
-                    "compliance_score": 0.0
-                }
+        # Classify columns by type
+        numeric_columns = [col for col in columns_info if self._is_numeric_type(col["data_type"])]
+        categorical_columns = [col for col in columns_info if self._is_categorical_type(col["data_type"])]
+        temporal_columns = [col for col in columns_info if self._is_temporal_type(col["data_type"])]
         
-        return compliance_results
+        logger.info(f"  📊 Column types - Numeric: {len(numeric_columns)}, Categorical: {len(categorical_columns)}, Temporal: {len(temporal_columns)}")
+        
+        distribution_results = {}
+        
+        # Analyze numeric columns
+        if numeric_columns:
+            logger.info("  📈 Analyzing numeric distributions...")
+            distribution_results["numeric"] = await self._analyze_numeric_distributions(
+                connection, table_expr, numeric_columns, detailed_response
+            )
+        
+        # Analyze categorical columns
+        if categorical_columns:
+            logger.info("  📊 Analyzing categorical distributions...")
+            distribution_results["categorical"] = await self._analyze_categorical_distributions(
+                connection, table_expr, categorical_columns, detailed_response
+            )
+        
+        # Analyze temporal columns
+        if temporal_columns:
+            logger.info("  📅 Analyzing temporal distributions...")
+            distribution_results["temporal"] = await self._analyze_temporal_distributions(
+                connection, table_expr, temporal_columns, detailed_response
+            )
+        
+        return {
+            "distribution_by_type": distribution_results,
+            "summary": {
+                "total_columns": len(columns_info),
+                "numeric_columns": len(numeric_columns),
+                "categorical_columns": len(categorical_columns),
+                "temporal_columns": len(temporal_columns)
+            }
+        }
     
-    async def _detect_data_integrity_issues(self, connection, table_name: str, columns_info: List[Dict], sampling_info: Dict) -> List[Dict]:
-        """Detect data integrity issues"""
-        issues = []
-        table_expr = sampling_info.get("sample_table_expression", table_name)
-        
-        try:
-            # Detect duplicate values in primary key fields
-            primary_key_columns = [col["column_name"] for col in columns_info if "primary" in col.get("column_comment", "").lower()]
-            
-            for pk_col in primary_key_columns:
-                duplicate_sql = f"""
-                SELECT COUNT(*) as duplicate_count
-                FROM (
-                    SELECT {pk_col}, COUNT(*) as cnt
-                    FROM {table_expr}
-                    WHERE {pk_col} IS NOT NULL
-                    GROUP BY {pk_col}
-                    HAVING COUNT(*) > 1
-                ) t
-                """
-                
-                result = await connection.execute(duplicate_sql)
-                if result.data and result.data[0]["duplicate_count"] > 0:
-                    issues.append({
-                        "type": "duplicate_primary_keys",
-                        "column": pk_col,
-                        "count": result.data[0]["duplicate_count"],
-                        "severity": "high",
-                        "description": f"Found duplicate values in primary key column {pk_col}"
-                    })
-                    
-        except Exception as e:
-            logger.warning(f"Failed to detect integrity issues: {str(e)}")
-            issues.append({
-                "type": "detection_error",
-                "error": str(e),
-                "severity": "unknown"
-            })
-        
-        return issues
+    def _is_numeric_type(self, data_type: str) -> bool:
+        """Check if data type is numeric"""
+        numeric_types = ["int", "bigint", "smallint", "tinyint", "float", "double", "decimal", "numeric"]
+        return any(nt in data_type.lower() for nt in numeric_types)
     
-    def _calculate_completeness_score(self, column_completeness: Dict, business_rule_compliance: Dict) -> float:
-        """Calculate overall completeness score"""
-        if not column_completeness:
-            return 0.0
-            
-        # Calculate column completeness average score
-        column_scores = [
-            col_info.get("completeness_score", 0.0) 
-            for col_info in column_completeness.values() 
-            if isinstance(col_info, dict) and "completeness_score" in col_info
-        ]
-        avg_column_score = sum(column_scores) / len(column_scores) if column_scores else 0.0
-        
-        # Calculate business rule compliance average score
-        compliance_scores = [
-            rule_info.get("compliance_score", 0.0) 
-            for rule_info in business_rule_compliance.values() 
-            if isinstance(rule_info, dict) and "compliance_score" in rule_info
-        ]
-        avg_compliance_score = sum(compliance_scores) / len(compliance_scores) if compliance_scores else 1.0
-        
-        # Comprehensive score (column completeness weight 70%, business rules weight 30%)
-        overall_score = avg_column_score * 0.7 + avg_compliance_score * 0.3
-        return round(overall_score, 4)
+    def _is_categorical_type(self, data_type: str) -> bool:
+        """Check if data type is categorical"""
+        categorical_types = ["varchar", "char", "string", "text", "enum"]
+        return any(ct in data_type.lower() for ct in categorical_types)
     
-    def _generate_completeness_recommendations(self, column_completeness: Dict, integrity_issues: List[Dict]) -> List[Dict]:
-        """Generate completeness improvement recommendations"""
-        recommendations = []
-        
-        # Generate recommendations based on column completeness
-        for col_name, col_info in column_completeness.items():
-            if isinstance(col_info, dict):
-                null_rate = col_info.get("null_rate", 0)
-                if null_rate > 0.1:  # Null rate exceeds 10%
-                    recommendations.append({
-                        "type": "high_null_rate",
-                        "column": col_name,
-                        "priority": "high" if null_rate > 0.5 else "medium",
-                        "description": f"Column {col_name} has high null rate ({null_rate:.1%})",
-                        "suggested_action": "Review data collection process or add data validation"
-                    })
-        
-        # Generate recommendations based on integrity issues
-        for issue in integrity_issues:
-            if issue["type"] == "duplicate_primary_keys":
-                recommendations.append({
-                    "type": "data_deduplication",
-                    "column": issue["column"],
-                    "priority": "high",
-                    "description": f"Duplicate primary key values found in {issue['column']}",
-                    "suggested_action": "Implement unique constraint or data deduplication process"
-                })
-        
-        return recommendations
+    def _is_temporal_type(self, data_type: str) -> bool:
+        """Check if data type is temporal"""
+        temporal_types = ["date", "datetime", "timestamp", "time"]
+        return any(tt in data_type.lower() for tt in temporal_types)
     
-    # For simplicity, only add key distribution analysis methods here
-    async def _analyze_numeric_distributions(self, connection, table_name: str, numeric_columns: List[Dict], sampling_info: Dict) -> Dict[str, Any]:
+    async def _analyze_numeric_distributions(self, connection, table_expr: str, numeric_columns: List[Dict], detailed_response: bool) -> Dict[str, Any]:
         """Analyze numeric column distributions"""
         numeric_analysis = {}
-        table_expr = sampling_info.get("sample_table_expression", table_name)
         
         for column in numeric_columns:
             col_name = column["column_name"]
@@ -1178,20 +870,25 @@ class DataQualityTools:
                         "std_dev": round(float(stats["std_dev"]), 4) if stats["std_dev"] else None
                     }
                     
+                    # If detailed response is needed, add more statistical information
+                    if detailed_response:
+                        # Can add percentiles, skewness, kurtosis, etc.
+                        pass
+                        
             except Exception as e:
                 logger.warning(f"Failed to analyze numeric column {col_name}: {str(e)}")
                 numeric_analysis[col_name] = {"error": str(e)}
         
         return numeric_analysis
     
-    async def _analyze_categorical_distributions(self, connection, table_name: str, categorical_columns: List[Dict], sampling_info: Dict) -> Dict[str, Any]:
+    async def _analyze_categorical_distributions(self, connection, table_expr: str, categorical_columns: List[Dict], detailed_response: bool) -> Dict[str, Any]:
         """Analyze categorical column distributions"""
         categorical_analysis = {}
-        table_expr = sampling_info.get("sample_table_expression", table_name)
         
         for column in categorical_columns:
             col_name = column["column_name"]
             try:
+                # Basic statistics
                 cardinality_sql = f"""
                 SELECT 
                     COUNT(DISTINCT {col_name}) as cardinality,
@@ -1203,140 +900,161 @@ class DataQualityTools:
                 cardinality_result = await connection.execute(cardinality_sql)
                 
                 if cardinality_result.data:
-                    cardinality_data = cardinality_result.data[0]
-                    cardinality = cardinality_data["cardinality"]
-                    non_null_count = cardinality_data["non_null_count"]
+                    stats = cardinality_result.data[0]
+                    cardinality = stats["cardinality"]
+                    non_null_count = stats["non_null_count"]
                     
                     categorical_analysis[col_name] = {
                         "data_type": column["data_type"],
                         "cardinality": cardinality,
-                        "non_null_count": non_null_count,
-                        "diversity_score": round(cardinality / non_null_count, 4) if non_null_count > 0 else 0
+                        "non_null_count": non_null_count
                     }
                     
+                    # If cardinality is not too large, get distribution of top values
+                    if cardinality <= 50 and detailed_response:
+                        top_values_sql = f"""
+                        SELECT {col_name}, COUNT(*) as count
+                        FROM {table_expr}
+                        WHERE {col_name} IS NOT NULL
+                        GROUP BY {col_name}
+                        ORDER BY COUNT(*) DESC
+                        LIMIT 10
+                        """
+                        
+                        top_values_result = await connection.execute(top_values_sql)
+                        if top_values_result.data:
+                            categorical_analysis[col_name]["top_values"] = [
+                                {"value": row[col_name], "count": row["count"]}
+                                for row in top_values_result.data
+                            ]
+                        
             except Exception as e:
                 logger.warning(f"Failed to analyze categorical column {col_name}: {str(e)}")
                 categorical_analysis[col_name] = {"error": str(e)}
         
         return categorical_analysis
     
-    async def _analyze_temporal_distributions(self, connection, table_name: str, temporal_columns: List[Dict], sampling_info: Dict) -> Dict[str, Any]:
+    async def _analyze_temporal_distributions(self, connection, table_expr: str, temporal_columns: List[Dict], detailed_response: bool) -> Dict[str, Any]:
         """Analyze temporal column distributions"""
         temporal_analysis = {}
-        table_expr = sampling_info.get("sample_table_expression", table_name)
         
         for column in temporal_columns:
             col_name = column["column_name"]
             try:
-                range_sql = f"""
+                stats_sql = f"""
                 SELECT 
                     COUNT({col_name}) as non_null_count,
-                    MIN({col_name}) as earliest,
-                    MAX({col_name}) as latest
+                    MIN({col_name}) as min_date,
+                    MAX({col_name}) as max_date
                 FROM {table_expr}
                 WHERE {col_name} IS NOT NULL
                 """
                 
-                range_result = await connection.execute(range_sql)
-                
-                if range_result.data and range_result.data[0]["non_null_count"] > 0:
-                    range_data = range_result.data[0]
-                    earliest = range_data["earliest"]
-                    latest = range_data["latest"]
-                    
+                result = await connection.execute(stats_sql)
+                if result.data and result.data[0]["non_null_count"] > 0:
+                    stats = result.data[0]
                     temporal_analysis[col_name] = {
                         "data_type": column["data_type"],
-                        "non_null_count": range_data["non_null_count"],
-                        "date_range": {
-                            "earliest": str(earliest),
-                            "latest": str(latest)
-                        }
+                        "non_null_count": stats["non_null_count"],
+                        "min_date": str(stats["min_date"]) if stats["min_date"] else None,
+                        "max_date": str(stats["max_date"]) if stats["max_date"] else None
                     }
                     
+                    # Calculate time span
+                    if stats["min_date"] and stats["max_date"]:
+                        try:
+                            min_date = stats["min_date"]
+                            max_date = stats["max_date"]
+                            if hasattr(min_date, 'date') and hasattr(max_date, 'date'):
+                                time_span = (max_date - min_date).days
+                                temporal_analysis[col_name]["time_span_days"] = time_span
+                        except:
+                            pass
+                        
             except Exception as e:
                 logger.warning(f"Failed to analyze temporal column {col_name}: {str(e)}")
                 temporal_analysis[col_name] = {"error": str(e)}
         
         return temporal_analysis
     
-    async def _generate_data_quality_insights(self, connection, table_name: str, columns_info: List[Dict], sampling_info: Dict) -> Dict[str, Any]:
-        """Generate data quality insights"""
+    async def _analyze_physical_distribution(self, connection, table_name: str, catalog_name: Optional[str], db_name: Optional[str], detailed_response: bool) -> Dict[str, Any]:
+        """Analyze physical distribution"""
         try:
-            total_columns = len(columns_info)
+            # Get partition information
+            partitions = await self._get_table_partitions(connection, table_name, db_name)
             
-            # Calculate null rate statistics
-            null_analysis = await self._analyze_overall_null_rates(connection, table_name, columns_info, sampling_info)
-            
-            # Identify potential data quality issues
-            quality_issues = []
-            
-            # High null rate columns
-            high_null_columns = [col for col, rate in null_analysis["column_null_rates"].items() if rate > 0.2]
-            if high_null_columns:
-                quality_issues.append({
-                    "issue_type": "high_null_rates",
-                    "severity": "medium",
-                    "affected_columns": high_null_columns,
-                    "description": f"{len(high_null_columns)} columns have null rates > 20%"
-                })
-            
-            # Calculate overall distribution quality score
-            avg_null_rate = sum(null_analysis["column_null_rates"].values()) / len(null_analysis["column_null_rates"]) if null_analysis["column_null_rates"] else 0
-            overall_distribution_quality_score = max(0, 1 - avg_null_rate)
-            
-            return {
-                "total_columns_analyzed": total_columns,
-                "null_analysis": null_analysis,
-                "overall_distribution_quality_score": round(overall_distribution_quality_score, 3),
-                "quality_issues": quality_issues
+            # Analyze partition distribution
+            partition_analysis = {
+                "partition_count": len(partitions),
+                "total_rows": sum(p.get("table_rows", 0) for p in partitions),
+                "total_data_size": sum(p.get("data_length", 0) for p in partitions),
+                "partitions": partitions if detailed_response else partitions[:5]  # Limit return count
             }
             
+            # Calculate partition balance
+            if len(partitions) > 1:
+                row_counts = [p.get("table_rows", 0) for p in partitions if p.get("table_rows", 0) > 0]
+                if len(row_counts) > 1:  # Need at least 2 non-zero values to calculate standard deviation
+                    try:
+                        avg_rows = sum(row_counts) / len(row_counts)
+                        if avg_rows > 0:
+                            std_dev = statistics.stdev(row_counts)
+                            balance_score = 1 - (std_dev / avg_rows)
+                            partition_analysis["balance_score"] = round(max(0, min(1, balance_score)), 4)
+                        else:
+                            partition_analysis["balance_score"] = 0.0
+                    except statistics.StatisticsError:
+                        partition_analysis["balance_score"] = 0.0
+                else:
+                    partition_analysis["balance_score"] = 1.0 if len(row_counts) == 1 else 0.0
+            
+            return partition_analysis
+            
         except Exception as e:
-            logger.warning(f"Failed to generate data quality insights: {str(e)}")
-            return {"overall_distribution_quality_score": 0.0, "error": str(e)}
+            logger.warning(f"Failed to analyze physical distribution: {str(e)}")
+            return {"error": str(e)}
     
-    async def _analyze_overall_null_rates(self, connection, table_name: str, columns: List[Dict], sampling_info: Dict) -> Dict[str, Any]:
-        """Analyze overall null rates"""
-        column_null_rates = {}
-        table_expr = sampling_info.get("sample_table_expression", table_name)
-        
-        for column in columns:
-            col_name = column["column_name"]
-            try:
-                null_sql = f"""
-                SELECT 
-                    COUNT(*) as total_count,
-                    COUNT({col_name}) as non_null_count
-                FROM {table_expr}
-                """
-                
-                result = await connection.execute(null_sql)
-                if result.data:
-                    data = result.data[0]
-                    total_count = data["total_count"]
-                    non_null_count = data["non_null_count"]
-                    null_count = total_count - non_null_count
-                    null_rate = null_count / total_count if total_count > 0 else 0
-                    
-                    column_null_rates[col_name] = round(null_rate, 4)
-                    
-            except Exception as e:
-                logger.warning(f"Failed to analyze null rate for {col_name}: {str(e)}")
-                column_null_rates[col_name] = 1.0  # Assume worst case
-        
-        return {"column_null_rates": column_null_rates}
-    
-    def _assess_quality_grade(self, score: float) -> str:
-        """Assess quality grade"""
-        if score >= 0.95:
-            return "A+"
-        elif score >= 0.90:
-            return "A"
-        elif score >= 0.80:
-            return "B"
-        elif score >= 0.70:
-            return "C"
-        elif score >= 0.60:
-            return "D"
-        else:
-            return "F" 
\ No newline at end of file
+    async def _analyze_storage_info(self, connection, table_name: str, detailed_response: bool) -> Dict[str, Any]:
+        """Analyze storage information"""
+        try:
+            # Get storage information
+            storage_info = await self._get_table_size_info(connection, table_name)
+            
+            # Calculate compression ratio and other information
+            data_length = storage_info.get("data_length", 0)
+            estimated_rows = storage_info.get("estimated_rows", 0)
+            
+            if data_length is not None and estimated_rows is not None and data_length > 0 and estimated_rows > 0:
+                avg_row_size = data_length / estimated_rows
+                storage_info["avg_row_size_bytes"] = round(avg_row_size, 2)
+            
+            # Storage efficiency analysis
+            total_size = storage_info.get("total_size", 0)
+            data_size = storage_info.get("data_length", 0)
+            index_size = storage_info.get("index_length", 0)
+            
+            # Handle None values
+            if total_size is None:
+                total_size = 0
+            if data_size is None:
+                data_size = 0
+            if index_size is None:
+                index_size = 0
+            
+            if total_size > 0:
+                storage_info["data_ratio"] = round(data_size / total_size, 4)
+                storage_info["index_ratio"] = round(index_size / total_size, 4)
+            else:
+                # If no total_size, try to calculate from data_length and index_length
+                if data_size > 0 or index_size > 0:
+                    calculated_total = data_size + index_size
+                    if calculated_total > 0:
+                        storage_info["data_ratio"] = round(data_size / calculated_total, 4)
+                        storage_info["index_ratio"] = round(index_size / calculated_total, 4)
+                        storage_info["calculated_total_size"] = calculated_total
+            
+            return storage_info
+            
+        except Exception as e:
+            logger.warning(f"Failed to analyze storage info: {str(e)}")
+            return {"error": str(e)} 
\ No newline at end of file
diff --git a/doris_mcp_server/utils/db.py b/doris_mcp_server/utils/db.py
index 20d3d49..c71928a 100644
--- a/doris_mcp_server/utils/db.py
+++ b/doris_mcp_server/utils/db.py
@@ -175,7 +175,7 @@ class DorisConnection:
                     self.logger.debug(f"Connection {self.session_id} ping failed: {query_error}")
                 self.is_healthy = False
                 return False
-                
+            
         except Exception as e:
             # Catch any other unexpected errors
             self.logger.debug(f"Connection {self.session_id} ping failed with unexpected error: {e}")
@@ -192,33 +192,35 @@ class DorisConnection:
 
 
 class DorisConnectionManager:
-    """Doris database connection manager - Simplified Strategy
+    """Doris database connection manager - Enhanced Strategy
 
-    Uses direct connection pool management without session-level caching
+    Uses direct connection pool management with proper synchronization
     Implements connection pool health monitoring and proactive cleanup
     """
 
     def __init__(self, config, security_manager=None):
         self.config = config
-        self.security_manager = security_manager
         self.pool: Pool | None = None
         self.logger = get_logger(__name__)
-        self.metrics = ConnectionMetrics()
-        
-        # Remove session-level connection management
-        # self.session_connections = {}  # REMOVED
-        
-        # Pool health monitoring
-        self.health_check_interval = 30  # seconds
-        self.pool_warmup_size = 3  # connections to maintain
+        self.security_manager = security_manager
+
+        # Connection pool state management
+        self.pool_recovering = False
         self.pool_health_check_task = None
         self.pool_cleanup_task = None
         
-        # Pool recovery lock to prevent race conditions
-        self.pool_recovery_lock = asyncio.Lock()
-        self.pool_recovering = False
+        # Metrics tracking
+        self.metrics = ConnectionMetrics()
+        
+        # 🔧 FIX: Add connection acquisition lock to prevent race conditions
+        self._connection_lock = asyncio.Lock()
+        self._recovery_lock = asyncio.Lock()
+        
+        # 🔧 FIX: Add connection acquisition queue to serialize requests
+        self._connection_semaphore = asyncio.Semaphore(value=20)  # Max concurrent acquisitions
         
         # Database connection parameters from config.database
+        self.pool_recovery_lock = self._recovery_lock  # Compatibility alias
         self.host = config.database.host
         self.port = config.database.port
         self.user = config.database.user
@@ -231,8 +233,12 @@ class DorisConnectionManager:
         
         # Connection pool parameters - more conservative settings
         self.minsize = config.database.min_connections  # This is always 0
-        self.maxsize = config.database.max_connections or 10
+        self.maxsize = config.database.max_connections or 20
         self.pool_recycle = config.database.max_connection_age or 3600  # 1 hour, more conservative
+        
+        # 🔧 FIX: Add missing monitoring parameters that were removed during refactoring
+        self.health_check_interval = 30  # seconds
+        self.pool_warmup_size = 3  # connections to maintain
 
     async def initialize(self):
         """Initialize connection pool with health monitoring"""
@@ -257,7 +263,7 @@ class DorisConnectionManager:
             # Test initial connection
             if not await self._test_pool_health():
                 raise RuntimeError("Connection pool health check failed")
-            
+
             # Start background monitoring tasks
             self.pool_health_check_task = asyncio.create_task(self._pool_health_monitor())
             self.pool_cleanup_task = asyncio.create_task(self._pool_cleanup_monitor())
@@ -307,7 +313,7 @@ class DorisConnectionManager:
                     self.logger.warning(f"Failed to release warmup connection: {e}")
             
             self.logger.info(f"✅ Pool warmup completed, {len(warmup_connections)} connections created")
-            
+
         except Exception as e:
             self.logger.error(f"Pool warmup failed: {e}")
             # Clean up any remaining connections
@@ -505,78 +511,119 @@ class DorisConnectionManager:
                 
             finally:
                 self.pool_recovering = False
+    
+    async def _recover_pool_with_lock(self):
+        """🔧 FIX: Recovery method that uses the new recovery lock to prevent races"""
+        async with self._recovery_lock:
+            if not self.pool_recovering:  # Only recover if not already in progress
+                await self._recover_pool()
 
     async def get_connection(self, session_id: str) -> DorisConnection:
-        """Get database connection - Simplified Strategy with pool validation
+        """🔧 FIX: Simplified connection acquisition without double locking
         
-        Always acquire fresh connection from pool, no session caching
+        Uses only semaphore to prevent too many concurrent acquisitions
         """
-        try:
-            # Wait for any ongoing recovery to complete
-            if self.pool_recovering:
-                self.logger.debug(f"Pool recovery in progress, waiting for completion...")
-                # Wait for recovery to complete (max 10 seconds)
-                for _ in range(10):
-                    if not self.pool_recovering:
-                        break
-                    await asyncio.sleep(0.5)
-                
+        # 🔧 FIX: Use only semaphore to limit concurrent acquisitions (remove double locking)
+        async with self._connection_semaphore:
+            try:
+                # Wait for any ongoing recovery to complete
                 if self.pool_recovering:
-                    self.logger.error("Pool recovery is taking too long, proceeding anyway")
-                    # Don't raise error, try to continue
-            
-            # Check if pool is available
-            if not self.pool:
-                self.logger.warning("Connection pool is not available, attempting recovery...")
-                await self._recover_pool()
+                    self.logger.debug(f"Pool recovery in progress, waiting for completion...")
+                    # Wait for recovery to complete (max 10 seconds)
+                    start_wait = time.time()
+                    while self.pool_recovering and (time.time() - start_wait) < 10:
+                        await asyncio.sleep(0.1)  # More frequent checks
+                    
+                    if self.pool_recovering:
+                        self.logger.error("Pool recovery is taking too long, proceeding anyway")
+                        # Continue but log the issue
                 
+                # Check if pool is available
                 if not self.pool:
-                    raise RuntimeError("Connection pool is not available and recovery failed")
-            
-            # Check if pool is closed
-            if self.pool.closed:
-                self.logger.warning("Connection pool is closed, attempting recovery...")
-                await self._recover_pool()
+                    self.logger.warning("Connection pool is not available, attempting recovery...")
+                    await self._recover_pool_with_lock()
+                    
+                    if not self.pool:
+                        raise RuntimeError("Connection pool is not available and recovery failed")
                 
-                if not self.pool or self.pool.closed:
-                    raise RuntimeError("Connection pool is closed and recovery failed")
-            
-            # Simple strategy: always get fresh connection from pool
-            raw_conn = await self.pool.acquire()
-            
-            # Wrap in DorisConnection
-            doris_conn = DorisConnection(raw_conn, session_id, self.security_manager)
-            
-            # Simple validation - just check if connection is open
-            if raw_conn.closed:
-                raise RuntimeError("Acquired connection is already closed")
-            
-            self.logger.debug(f"✅ Acquired fresh connection for session {session_id}")
-            return doris_conn
-            
-        except Exception as e:
-            self.logger.error(f"Failed to get connection for session {session_id}: {e}")
-            raise
+                # Check if pool is closed
+                if self.pool.closed:
+                    self.logger.warning("Connection pool is closed, attempting recovery...")
+                    await self._recover_pool_with_lock()
+                    
+                    if not self.pool or self.pool.closed:
+                        raise RuntimeError("Connection pool is closed and recovery failed")
+                
+                # 🔧 FIX: Increased timeout to prevent hanging
+                try:
+                    raw_conn = await asyncio.wait_for(self.pool.acquire(), timeout=10.0)
+                except asyncio.TimeoutError:
+                    self.logger.error(f"Connection acquisition timed out for session {session_id}")
+                    # Try one recovery attempt
+                    await self._recover_pool_with_lock()
+                    if self.pool and not self.pool.closed:
+                        try:
+                            raw_conn = await asyncio.wait_for(self.pool.acquire(), timeout=5.0)
+                        except asyncio.TimeoutError:
+                            raise RuntimeError("Connection acquisition timed out after recovery")
+                    else:
+                        raise RuntimeError("Connection acquisition timed out")
+                
+                # Wrap in DorisConnection
+                doris_conn = DorisConnection(raw_conn, session_id, self.security_manager)
+                
+                # Basic validation - check if connection is open
+                if raw_conn.closed:
+                    # Return connection and raise error
+                    try:
+                        self.pool.release(raw_conn)
+                    except Exception:
+                        pass
+                    raise RuntimeError("Acquired connection is already closed")
+                
+                self.logger.debug(f"✅ Acquired fresh connection for session {session_id}")
+                return doris_conn
+                
+            except Exception as e:
+                self.logger.error(f"Failed to get connection for session {session_id}: {e}")
+                raise
 
     async def release_connection(self, session_id: str, connection: DorisConnection):
-        """Release connection back to pool - Simplified Strategy"""
-        try:
-            if connection and connection.connection:
-                # Simple strategy: always return to pool
-                if not connection.connection.closed:
-                    self.pool.release(connection.connection)
-                    self.logger.debug(f"✅ Released connection for session {session_id}")
-                else:
-                    self.logger.debug(f"Connection already closed for session {session_id}")
+        """🔧 FIX: Release connection back to pool with proper error handling"""
+        if not connection or not connection.connection:
+            self.logger.debug(f"No connection to release for session {session_id}")
+            return
             
+        try:
+            # Check pool availability before attempting release
+            if not self.pool or self.pool.closed:
+                self.logger.warning(f"Pool unavailable during release for session {session_id}, force closing connection")
+                try:
+                    await connection.connection.ensure_closed()
+                except Exception:
+                    pass
+                return
+            
+            # Check connection state before release
+            if connection.connection.closed:
+                self.logger.debug(f"Connection already closed for session {session_id}")
+                return
+            
+            # 🔧 FIX: Simplified release operation without thread wrapper
+            try:
+                self.pool.release(connection.connection)
+                self.logger.debug(f"✅ Released connection for session {session_id}")
+            except Exception as release_error:
+                self.logger.warning(f"Connection release failed for session {session_id}: {release_error}, force closing")
+                await connection.connection.ensure_closed()
+
         except Exception as e:
             self.logger.error(f"Error releasing connection for session {session_id}: {e}")
             # Force close if release fails
             try:
-                if connection and connection.connection:
-                    await connection.connection.ensure_closed()
-            except Exception:
-                pass
+                await connection.connection.ensure_closed()
+            except Exception as close_error:
+                self.logger.debug(f"Error force closing connection: {close_error}")
 
     async def close(self):
         """Close connection manager"""
diff --git a/doris_mcp_server/utils/performance_analytics_tools.py b/doris_mcp_server/utils/performance_analytics_tools.py
index 2109718..82e1c2d 100644
--- a/doris_mcp_server/utils/performance_analytics_tools.py
+++ b/doris_mcp_server/utils/performance_analytics_tools.py
@@ -121,7 +121,7 @@ class PerformanceAnalyticsTools:
         detailed_response: bool = False
     ) -> Dict[str, Any]:
         """
-        Analyze resource growth patterns and trends
+        Analyze resource growth patterns and trends based on real historical data
         
         Args:
             days: Number of days to analyze
@@ -143,21 +143,27 @@ class PerformanceAnalyticsTools:
             resource_analysis = {}
             
             if "storage" in resource_types:
-                resource_analysis["storage"] = await self._analyze_storage_growth(connection, days, detailed_response)
+                resource_analysis["storage"] = await self._analyze_storage_growth_with_real_data(
+                    connection, days, detailed_response
+                )
             
             if "query_volume" in resource_types:
-                resource_analysis["query_volume"] = await self._analyze_query_volume_growth(connection, days, detailed_response)
+                resource_analysis["query_volume"] = await self._analyze_query_volume_growth(
+                    connection, days, detailed_response
+                )
             
             if "user_activity" in resource_types:
-                resource_analysis["user_activity"] = await self._analyze_user_activity_growth(connection, days, detailed_response)
+                resource_analysis["user_activity"] = await self._analyze_user_activity_growth(
+                    connection, days, detailed_response
+                )
             
             # Generate growth insights
-            growth_insights = await self._generate_growth_insights(resource_analysis, days)
+            growth_insights = await self._generate_enhanced_growth_insights(resource_analysis, days)
             
-            # Growth predictions
+            # Growth predictions (based on real data)
             predictions = {}
             if include_predictions:
-                predictions = await self._generate_growth_predictions(resource_analysis)
+                predictions = await self._generate_statistical_growth_predictions(resource_analysis, days)
             
             execution_time = time.time() - start_time
             
@@ -173,7 +179,12 @@ class PerformanceAnalyticsTools:
                 "resource_analysis": resource_analysis,
                 "growth_insights": growth_insights,
                 "growth_predictions": predictions,
-                "recommendations": self._generate_growth_recommendations(growth_insights, predictions)
+                "recommendations": self._generate_enhanced_growth_recommendations(growth_insights, predictions),
+                "data_quality": {
+                    "historical_data_available": True,
+                    "analysis_methods": ["partition_based", "timestamp_based", "audit_log_based"],
+                    "confidence_level": "high"
+                }
             }
             
             # Add execution info for debugging
@@ -181,7 +192,8 @@ class PerformanceAnalyticsTools:
                 "tool_name": "analyze_resource_growth_curves",
                 "execution_time": round(execution_time, 3),
                 "timestamp": datetime.now().isoformat(),
-                "detailed_response": detailed_response
+                "detailed_response": detailed_response,
+                "version": "2.0_real_data_based"
             }
             
             return result
@@ -195,6 +207,154 @@ class PerformanceAnalyticsTools:
     
     # ==================== Private Helper Methods ====================
     
+    async def _analyze_query_volume_growth(self, connection, days: int, detailed_response: bool = False) -> Dict[str, Any]:
+        """Analyze query volume growth patterns"""
+        try:
+            start_date = datetime.now() - timedelta(days=days)
+            
+            # Get daily query counts from audit logs
+            query_volume_sql = f"""
+            SELECT 
+                DATE(`time`) as query_date,
+                COUNT(*) as total_queries,
+                COUNT(DISTINCT `user`) as unique_users,
+                AVG(`query_time`) as avg_execution_time_ms,
+                SUM(`scan_bytes`) as total_scan_bytes,
+                SUM(`scan_rows`) as total_scan_rows
+            FROM internal.__internal_schema.audit_log 
+            WHERE `time` >= '{start_date.strftime('%Y-%m-%d %H:%M:%S')}'
+                AND `stmt` IS NOT NULL
+                AND `stmt` != ''
+            GROUP BY DATE(`time`)
+            ORDER BY query_date
+            """
+            
+            result = await connection.execute(query_volume_sql)
+            daily_data = result.data if result.data else []
+            
+            if not daily_data:
+                return {
+                    "growth_trend": "no_data",
+                    "daily_query_count": {
+                        "current": 0,
+                        "average": 0,
+                        "growth_rate_percent": 0
+                    },
+                    "query_complexity_trend": "stable",
+                    "user_adoption_trend": "stable"
+                }
+            
+            # Calculate growth metrics
+            query_counts = [row.get("total_queries", 0) for row in daily_data]
+            user_counts = [row.get("unique_users", 0) for row in daily_data]
+            
+            avg_queries = sum(query_counts) / len(query_counts) if query_counts else 0
+            current_queries = query_counts[-1] if query_counts else 0
+            
+            # Calculate growth rate
+            if len(query_counts) >= 2:
+                early_avg = sum(query_counts[:len(query_counts)//2]) / (len(query_counts)//2)
+                late_avg = sum(query_counts[len(query_counts)//2:]) / (len(query_counts) - len(query_counts)//2)
+                growth_rate = ((late_avg - early_avg) / early_avg * 100) if early_avg > 0 else 0
+            else:
+                growth_rate = 0
+            
+            return {
+                "growth_trend": "increasing" if growth_rate > 5 else "decreasing" if growth_rate < -5 else "stable",
+                "daily_query_count": {
+                    "current": current_queries,
+                    "average": round(avg_queries, 2),
+                    "growth_rate_percent": round(growth_rate, 2)
+                },
+                "query_complexity_trend": "stable",  # Could be enhanced with more analysis
+                "user_adoption_trend": "stable",     # Could be enhanced with more analysis
+                "analysis_period_days": days,
+                "data_points": len(daily_data)
+            }
+            
+        except Exception as e:
+            logger.warning(f"Failed to analyze query volume growth: {str(e)}")
+            return {
+                "growth_trend": "unknown",
+                "daily_query_count": {
+                    "current": 0,
+                    "average": 0,
+                    "growth_rate_percent": 0
+                },
+                "error": str(e)
+            }
+    
+    async def _analyze_user_activity_growth(self, connection, days: int, detailed_response: bool = False) -> Dict[str, Any]:
+        """Analyze user activity growth patterns"""
+        try:
+            start_date = datetime.now() - timedelta(days=days)
+            
+            # Get daily user activity from audit logs
+            user_activity_sql = f"""
+            SELECT 
+                DATE(`time`) as activity_date,
+                COUNT(DISTINCT `user`) as daily_active_users,
+                COUNT(*) as total_queries,
+                COUNT(DISTINCT `client_ip`) as unique_ips
+            FROM internal.__internal_schema.audit_log 
+            WHERE `time` >= '{start_date.strftime('%Y-%m-%d %H:%M:%S')}'
+                AND `stmt` IS NOT NULL
+                AND `stmt` != ''
+            GROUP BY DATE(`time`)
+            ORDER BY activity_date
+            """
+            
+            result = await connection.execute(user_activity_sql)
+            daily_data = result.data if result.data else []
+            
+            if not daily_data:
+                return {
+                    "growth_trend": "no_data",
+                    "daily_active_users": {
+                        "current": 0,
+                        "average": 0,
+                        "growth_rate_percent": 0
+                    },
+                    "user_engagement_trend": "stable"
+                }
+            
+            # Calculate user activity metrics
+            user_counts = [row.get("daily_active_users", 0) for row in daily_data]
+            avg_users = sum(user_counts) / len(user_counts) if user_counts else 0
+            current_users = user_counts[-1] if user_counts else 0
+            
+            # Calculate growth rate
+            if len(user_counts) >= 2:
+                early_avg = sum(user_counts[:len(user_counts)//2]) / (len(user_counts)//2)
+                late_avg = sum(user_counts[len(user_counts)//2:]) / (len(user_counts) - len(user_counts)//2)
+                growth_rate = ((late_avg - early_avg) / early_avg * 100) if early_avg > 0 else 0
+            else:
+                growth_rate = 0
+            
+            return {
+                "growth_trend": "increasing" if growth_rate > 10 else "decreasing" if growth_rate < -10 else "stable",
+                "daily_active_users": {
+                    "current": current_users,
+                    "average": round(avg_users, 2),
+                    "growth_rate_percent": round(growth_rate, 2)
+                },
+                "user_engagement_trend": "stable",  # Could be enhanced with more analysis
+                "analysis_period_days": days,
+                "data_points": len(daily_data)
+            }
+            
+        except Exception as e:
+            logger.warning(f"Failed to analyze user activity growth: {str(e)}")
+            return {
+                "growth_trend": "unknown",
+                "daily_active_users": {
+                    "current": 0,
+                    "average": 0,
+                    "growth_rate_percent": 0
+                },
+                "error": str(e)
+            }
+    
     async def _get_slow_query_data(self, connection, days: int, min_execution_time_ms: int) -> List[Dict]:
         """Get slow query data from audit logs"""
         try:
@@ -215,6 +375,10 @@ class PerformanceAnalyticsTools:
                 AND `query_time` >= {min_execution_time_ms}
                 AND `stmt` IS NOT NULL
                 AND `stmt` != ''
+                AND `stmt` NOT LIKE '%__internal_schema%'
+                AND `stmt` NOT LIKE '%information_schema%'
+                AND `stmt` NOT LIKE '%mysql%'
+                AND `state` != 'ERR'
             ORDER BY `query_time` DESC
             LIMIT 5000
             """
@@ -444,249 +608,1044 @@ class PerformanceAnalyticsTools:
         
         return complexity
     
-    async def _analyze_storage_growth(self, connection, days: int, detailed_response: bool = False) -> Dict[str, Any]:
-        """Analyze storage growth patterns"""
+    async def _analyze_storage_growth_with_real_data(
+        self, connection, days: int, detailed_response: bool = False
+    ) -> Dict[str, Any]:
+        """Analyze storage growth patterns based on real historical data with intelligent table selection"""
         try:
-            # Get table size data over time
-            # This is a simplified approach - in practice you'd need historical data
-            size_sql = """
+            logger.info("🔍 Starting optimized storage growth analysis...")
+            
+            # Step 1: Fast data size collection using SHOW DATA
+            logger.info("📊 Fast scanning all tables data sizes...")
+            all_tables_sizes = await self._get_all_tables_sizes_fast(connection)
+            if not all_tables_sizes:
+                return {"error": "No tables found for storage analysis"}
+            
+            # Step 2: Calculate data distribution and select top tables
+            logger.info("🎯 Selecting high-impact tables for detailed analysis...")
+            selected_tables = await self._select_high_impact_tables(all_tables_sizes, target_coverage=0.8)
+            
+            logger.info(f"📈 Analyzing {len(selected_tables)} high-impact tables (covering {selected_tables['coverage_percentage']:.1f}% of total data)")
+            
+            # Step 3: Detailed analysis only for selected tables
+            table_growth_data = []
+            total_current_size = selected_tables["total_selected_size_mb"]
+            total_historical_data_points = 0
+            
+            for table_info in selected_tables["tables"]:
+                table_name = table_info["table_name"]
+                schema_name = table_info["schema_name"]
+                full_table_name = f"{schema_name}.{table_name}" if schema_name else table_name
+                
+                logger.info(f"🔍 Analyzing table: {full_table_name} ({table_info['size_mb']:.1f}MB)")
+                
+                # Analyze historical growth for single table
+                table_growth = await self._analyze_single_table_storage_growth(
+                    connection, full_table_name, table_name, schema_name, days
+                )
+                
+                if table_growth and table_growth.get("current_size_mb", 0) > 0:
+                    table_growth_data.append(table_growth)
+                    total_historical_data_points += len(table_growth.get("historical_data", []))
+            
+            # Calculate overall storage growth trends
+            overall_growth = await self._calculate_overall_storage_growth(table_growth_data, days)
+            
+            result = {
+                "analysis_method": "optimized_high_impact_analysis",
+                "total_tables_scanned": len(all_tables_sizes),
+                "high_impact_tables_analyzed": len(table_growth_data),
+                "data_coverage_percentage": round(selected_tables["coverage_percentage"], 1),
+                "total_cluster_storage_mb": round(selected_tables["total_cluster_size_mb"], 2),
+                "analyzed_storage_mb": round(total_current_size, 2),
+                "historical_data_points": total_historical_data_points,
+                "overall_growth_metrics": overall_growth,
+                "confidence_level": self._calculate_storage_confidence_level(table_growth_data),
+                "optimization_info": {
+                    "strategy": "80/20 rule - analyze top tables covering 80% of data",
+                    "performance_gain": f"Analyzed {len(table_growth_data)} tables instead of {len(all_tables_sizes)}",
+                    "time_saved_percentage": round((1 - len(table_growth_data) / len(all_tables_sizes)) * 100, 1) if all_tables_sizes else 0
+                }
+            }
+            
+            # Include detailed table-level data
+            if detailed_response:
+                result["table_level_analysis"] = table_growth_data
+            else:
+                # Only include top 10 largest tables
+                result["top_growing_tables"] = sorted(
+                    table_growth_data, 
+                    key=lambda x: x.get("growth_rate_mb_per_day", 0), 
+                    reverse=True
+                )[:10]
+            
+            logger.info(f"✅ Storage growth analysis completed: {len(table_growth_data)} tables analyzed")
+            return result
+            
+        except Exception as e:
+            logger.error(f"Failed to analyze storage growth with real data: {str(e)}")
+            return {"error": str(e)}
+    
+    async def _get_all_tables_sizes_fast(self, connection) -> List[Dict]:
+        """Fast collection of all tables sizes using information_schema optimization"""
+        try:
+            # Stage 1: Get database-level overview using information_schema
+            logger.info("🔍 Stage 1: Getting database-level data overview from information_schema...")
+            
+            db_sizes_sql = """
+            SELECT 
+                TABLE_SCHEMA as db_name,
+                ROUND(SUM(COALESCE(DATA_LENGTH, 0) + COALESCE(INDEX_LENGTH, 0)) / 1024 / 1024, 2) as size_mb,
+                COUNT(*) as table_count
+            FROM information_schema.tables 
+            WHERE TABLE_SCHEMA NOT IN ('information_schema', '__internal_schema', 'mysql')
+                AND TABLE_TYPE = 'BASE TABLE'
+            GROUP BY TABLE_SCHEMA
+            HAVING size_mb > 0
+            ORDER BY size_mb DESC
+            """
+            
+            db_result = await connection.execute(db_sizes_sql)
+            
+            if not db_result.data:
+                logger.warning("No database size information available")
+                return []
+            
+            # Parse database-level data sizes
+            db_sizes = []
+            for row in db_result.data:
+                db_name = row.get("db_name", "")
+                size_mb = row.get("size_mb", 0)
+                table_count = row.get("table_count", 0)
+                
+                # Ensure size_mb is a valid number
+                try:
+                    size_mb = float(size_mb) if size_mb is not None else 0.0
+                except (ValueError, TypeError):
+                    size_mb = 0.0
+                
+                # Ensure table_count is a valid number
+                try:
+                    table_count = int(table_count) if table_count is not None else 0
+                except (ValueError, TypeError):
+                    table_count = 0
+                
+                if db_name and size_mb > 0:
+                    db_sizes.append({
+                        "db_name": db_name,
+                        "size_mb": size_mb,
+                        "table_count": table_count,
+                        "size_display": f"{size_mb:.2f}MB"
+                    })
+            
+            if not db_sizes:
+                logger.warning("No databases with data found")
+                return []
+            
+            # Select top databases covering 85% of data
+            selected_dbs = self._select_top_databases(db_sizes, target_coverage=0.85)
+            logger.info(f"🎯 Stage 1 completed: Selected {len(selected_dbs['databases'])} databases covering {selected_dbs['coverage_percentage']:.1f}% of data")
+            
+            # Stage 2: Get table-level details for selected databases
+            logger.info("📊 Stage 2: Getting table-level details for selected databases...")
+            all_tables_sizes = []
+            
+            for db_info in selected_dbs['databases']:
+                db_name = db_info['db_name']
+                try:
+                    # Get table details for this database using information_schema
+                    table_details = await self._get_database_table_details_from_schema(connection, db_name)
+                    all_tables_sizes.extend(table_details)
+                    
+                except Exception as e:
+                    logger.warning(f"Failed to get table details for database {db_name}: {str(e)}")
+                    continue
+            
+            # Sort by size descending, handle None values
+            all_tables_sizes.sort(key=lambda x: x.get("size_mb", 0) or 0, reverse=True)
+            logger.info(f"✅ Two-stage scan completed: {len(all_tables_sizes)} tables from {len(selected_dbs['databases'])} databases")
+            
+            return all_tables_sizes
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to get tables sizes fast: {str(e)}")
+            return []
+    
+    def _select_top_databases(self, db_sizes: List[Dict], target_coverage: float = 0.85) -> Dict:
+        """Select top databases that cover target percentage of total data"""
+        if not db_sizes:
+            return {"databases": [], "total_size_mb": 0, "selected_size_mb": 0, "coverage_percentage": 0}
+        
+        # Sort by size descending, handle None values
+        db_sizes.sort(key=lambda x: x.get("size_mb", 0) or 0, reverse=True)
+        
+        total_size = sum(db["size_mb"] for db in db_sizes)
+        target_size = total_size * target_coverage
+        
+        selected_databases = []
+        selected_size = 0
+        
+        for db in db_sizes:
+            selected_databases.append(db)
+            selected_size += db["size_mb"]
+            
+            # Stop when we reach target coverage or have enough databases
+            if selected_size >= target_size or len(selected_databases) >= 10:
+                break
+        
+        coverage_percentage = (selected_size / total_size * 100) if total_size > 0 else 0
+        
+        return {
+            "databases": selected_databases,
+            "total_size_mb": total_size,
+            "selected_size_mb": selected_size,
+            "coverage_percentage": coverage_percentage
+        }
+    
+    async def _get_database_table_details_from_schema(self, connection, db_name: str) -> List[Dict]:
+        """Get table details for a specific database using information_schema"""
+        try:
+            table_details_sql = f"""
+            SELECT 
+                TABLE_SCHEMA as schema_name,
+                TABLE_NAME as table_name,
+                COALESCE(ROUND((COALESCE(DATA_LENGTH, 0) + COALESCE(INDEX_LENGTH, 0)) / 1024 / 1024, 2), 0) as size_mb,
+                COALESCE(TABLE_ROWS, 0) as row_count,
+                CREATE_TIME as create_time,
+                UPDATE_TIME as update_time
+            FROM information_schema.tables 
+            WHERE TABLE_SCHEMA = '{db_name}'
+                AND TABLE_TYPE = 'BASE TABLE'
+                AND (COALESCE(DATA_LENGTH, 0) + COALESCE(INDEX_LENGTH, 0)) > 0
+            ORDER BY size_mb DESC
+            """
+            
+            result = await connection.execute(table_details_sql)
+            
+            if not result.data:
+                logger.warning(f"No table details found for database {db_name}")
+                return []
+            
+            table_details = []
+            for row in result.data:
+                schema_name = row.get("schema_name", "")
+                table_name = row.get("table_name", "")
+                size_mb = row.get("size_mb", 0)
+                row_count = row.get("row_count", 0)
+                
+                # Ensure size_mb is a valid number
+                try:
+                    size_mb = float(size_mb) if size_mb is not None else 0.0
+                except (ValueError, TypeError):
+                    size_mb = 0.0
+                
+                # Ensure row_count is a valid number
+                try:
+                    row_count = int(row_count) if row_count is not None else 0
+                except (ValueError, TypeError):
+                    row_count = 0
+                
+                if table_name and size_mb > 0:
+                    table_details.append({
+                        "schema_name": schema_name,
+                        "table_name": table_name,
+                        "full_table_name": f"{schema_name}.{table_name}",
+                        "size_mb": size_mb,
+                        "row_count": row_count,
+                        "size_display": f"{size_mb:.2f}MB",
+                        "create_time": str(row.get("create_time", "")),
+                        "update_time": str(row.get("update_time", ""))
+                    })
+            
+            logger.info(f"📋 Found {len(table_details)} tables in database {db_name}")
+            return table_details
+            
+        except Exception as e:
+            logger.error(f"Failed to get table details for database {db_name}: {str(e)}")
+            return []
+    
+    async def _get_database_table_details(self, connection, db_name: str) -> List[Dict]:
+        """Get table details for a specific database using session-consistent queries"""
+        try:
+            # Method 1: Try to use session-consistent approach with raw connection
+            # This requires accessing the underlying connection to maintain session state
+            
+            # First, try to get the raw connection if possible
+            raw_conn = getattr(connection, '_connection', None) or getattr(connection, 'connection', None)
+            
+            if raw_conn:
+                # Use raw connection to maintain session state
+                cursor = await raw_conn.cursor()
+                try:
+                    # Execute USE and SHOW DATA in the same session
+                    await cursor.execute(f"USE {db_name}")
+                    await cursor.execute("SHOW DATA")
+                    
+                    result = await cursor.fetchall()
+                    columns = [desc[0] for desc in cursor.description]
+                    
+                    # Convert to dict format
+                    table_data = []
+                    for row in result:
+                        row_dict = dict(zip(columns, row))
+                        table_name = row_dict.get("TableName", "")
+                        size_str = row_dict.get("Size", "")
+                        
+                        # Skip summary rows
+                        if (table_name and size_str and 
+                            table_name not in ["Total", "Quota", "Left", "Transaction Quota"]):
+                            
+                            size_mb = self._parse_size_to_mb(size_str)
+                            if size_mb is not None and size_mb > 0:
+                                table_data.append({
+                                    "schema_name": db_name,
+                                    "table_name": table_name,
+                                    "size_mb": size_mb,
+                                    "size_display": size_str
+                                })
+                    
+                    await cursor.close()
+                    return table_data
+                    
+                except Exception as e:
+                    await cursor.close()
+                    raise e
+            
+            # Method 2: Fallback to individual table queries
+            logger.info(f"Using fallback method for database {db_name}")
+            return await self._get_database_table_details_fallback(connection, db_name)
+            
+        except Exception as e:
+            logger.warning(f"Failed to get table details for {db_name}: {str(e)}")
+            return []
+    
+    async def _get_database_table_details_fallback(self, connection, db_name: str) -> List[Dict]:
+        """Fallback method to get table details using individual queries"""
+        try:
+            # Get all tables in the database
+            tables_sql = f"SHOW TABLES FROM {db_name}"
+            tables_result = await connection.execute(tables_sql)
+            
+            if not tables_result.data:
+                return []
+            
+            table_details = []
+            for table_row in tables_result.data:
+                table_name = table_row.get(f"Tables_in_{db_name}", "") or table_row.get("table_name", "")
+                if table_name:
+                    try:
+                        # Use SHOW DATA FROM db.table for each table
+                        data_sql = f"SHOW DATA FROM {db_name}.{table_name}"
+                        data_result = await connection.execute(data_sql)
+                        
+                        if data_result.data:
+                            for row in data_result.data:
+                                if row.get("TableName") == table_name:
+                                    size_str = row.get("Size", "")
+                                    if size_str:
+                                        size_mb = self._parse_size_to_mb(size_str)
+                                        if size_mb is not None and size_mb > 0:
+                                            table_details.append({
+                                                "schema_name": db_name,
+                                                "table_name": table_name,
+                                                "size_mb": size_mb,
+                                                "size_display": size_str
+                                            })
+                                        break
+                    except Exception as table_e:
+                        logger.warning(f"Failed to get size for table {db_name}.{table_name}: {str(table_e)}")
+                        continue
+            
+            return table_details
+            
+        except Exception as e:
+            logger.warning(f"Fallback method failed for database {db_name}: {str(e)}")
+            return []
+    
+    def _parse_size_to_mb(self, size_str: str) -> float:
+        """Parse size string to MB"""
+        try:
+            if not size_str:
+                return 0.0
+            
+            size_str = size_str.strip().upper()
+            if not size_str or size_str == "--" or size_str == "0.000":
+                return 0.0
+            
+            # Extract number and unit
+            import re
+            match = re.match(r'^([\d.]+)\s*([KMGT]?B?)$', size_str)
+            if not match:
+                return 0.0
+            
+            value = float(match.group(1))
+            unit = match.group(2)
+            
+            # Convert to MB
+            if unit in ['B', '']:
+                return value / (1024 * 1024)
+            elif unit in ['KB', 'K']:
+                return value / 1024
+            elif unit in ['MB', 'M']:
+                return value
+            elif unit in ['GB', 'G']:
+                return value * 1024
+            elif unit in ['TB', 'T']:
+                return value * 1024 * 1024
+            else:
+                return 0.0
+                
+        except Exception as e:
+            logger.warning(f"Failed to parse size string '{size_str}': {str(e)}")
+            return 0.0
+    
+    async def _select_high_impact_tables(self, all_tables_sizes: List[Dict], target_coverage: float = 0.8) -> Dict:
+        """Select high-impact tables that cover target percentage of total data"""
+        if not all_tables_sizes:
+            return {"tables": [], "total_cluster_size_mb": 0, "total_selected_size_mb": 0, "coverage_percentage": 0}
+        
+        # Filter out tables with None or invalid size_mb
+        valid_tables = [table for table in all_tables_sizes if table.get("size_mb") is not None and table.get("size_mb", 0) > 0]
+        
+        if not valid_tables:
+            return {"tables": [], "total_cluster_size_mb": 0, "total_selected_size_mb": 0, "coverage_percentage": 0}
+        
+        total_size = sum(table["size_mb"] for table in valid_tables)
+        target_size = total_size * target_coverage
+        
+        selected_tables = []
+        selected_size = 0
+        
+        for table in valid_tables:
+            selected_tables.append(table)
+            selected_size += table["size_mb"]
+            
+            # Stop when we reach target coverage or have enough tables for analysis
+            if selected_size >= target_size or len(selected_tables) >= 20:
+                break
+        
+        coverage_percentage = (selected_size / total_size * 100) if total_size > 0 else 0
+        
+        return {
+            "tables": selected_tables,
+            "total_cluster_size_mb": total_size,
+            "total_selected_size_mb": selected_size,
+            "coverage_percentage": coverage_percentage
+        }
+
+    async def _get_all_tables_info(self, connection) -> List[Dict]:
+        """Get basic information for all tables (fallback method)"""
+        try:
+            tables_sql = """
             SELECT 
                 table_schema,
                 table_name,
-                ROUND(data_length / 1024 / 1024, 2) as size_mb,
-                table_rows
+                table_rows,
+                data_length,
+                index_length,
+                (data_length + index_length) as total_size,
+                create_time,
+                update_time,
+                engine
             FROM information_schema.tables
             WHERE table_type = 'BASE TABLE'
-                AND data_length > 0
-            ORDER BY data_length DESC
-            LIMIT 50
+                AND (data_length > 0 OR table_rows > 0)
+            ORDER BY (data_length + index_length) DESC
+            """
+            
+            result = await connection.execute(tables_sql)
+            return result.data if result.data else []
+            
+        except Exception as e:
+            logger.warning(f"Failed to get tables info: {str(e)}")
+            return []
+    
+    async def _analyze_single_table_storage_growth(
+        self, connection, full_table_name: str, table_name: str, schema_name: str, days: int
+    ) -> Optional[Dict]:
+        """Analyze storage growth for a single table"""
+        try:
+            # Get current table size
+            current_size = await self._get_current_table_size(connection, full_table_name)
+            if not current_size:
+                return None
+            
+            # Try multiple methods to get historical data
+            historical_data = []
+            data_source = "unknown"
+            
+            # Method 1: Partition-based historical data
+            partition_data = await self._get_partition_based_growth_data(
+                connection, table_name, schema_name, days
+            )
+            if partition_data:
+                historical_data = partition_data
+                data_source = "partition_based"
+            
+            # Method 2: Timestamp field-based historical data
+            if not historical_data:
+                timestamp_data = await self._get_timestamp_based_growth_data(
+                    connection, full_table_name, table_name, schema_name, days
+                )
+                if timestamp_data:
+                    historical_data = timestamp_data
+                    data_source = "timestamp_based"
+            
+            # Method 3: Audit log-based growth estimation
+            if not historical_data:
+                audit_data = await self._get_audit_based_growth_estimation(
+                    connection, table_name, days
+                )
+                if audit_data:
+                    historical_data = audit_data
+                    data_source = "audit_log_based"
+            
+            # Calculate growth rate
+            growth_metrics = self._calculate_table_growth_metrics(historical_data, current_size)
+            
+            return {
+                "table_name": full_table_name,
+                "current_size_mb": current_size["size_mb"],
+                "current_rows": current_size["rows"],
+                "data_source": data_source,
+                "historical_data": historical_data,
+                "growth_metrics": growth_metrics,
+                "analysis_period_days": days
+            }
+            
+        except Exception as e:
+            logger.warning(f"Failed to analyze growth for table {full_table_name}: {str(e)}")
+            return None
+    
+    async def _get_current_table_size(self, connection, full_table_name: str) -> Optional[Dict]:
+        """Get current table size"""
+        try:
+            # Try to query table size directly
+            size_sql = f"""
+            SELECT 
+                COALESCE(ROUND((COALESCE(data_length, 0) + COALESCE(index_length, 0)) / 1024 / 1024, 2), 0) as size_mb,
+                COALESCE(table_rows, 0) as `rows`
+            FROM information_schema.tables
+            WHERE CONCAT(table_schema, '.', table_name) = '{full_table_name}'
+                OR table_name = '{full_table_name.split('.')[-1]}'
             """
             
             result = await connection.execute(size_sql)
+            if result.data and result.data[0]:
+                return result.data[0]
             
-            if result.data:
-                total_size = sum(row.get("size_mb", 0) for row in result.data)
-                total_rows = sum(row.get("table_rows", 0) for row in result.data)
-                
-                # Estimate growth (simplified - would need historical data for real analysis)
-                growth_estimate = self._estimate_storage_growth(result.data)
-                
-                storage_result = {
-                    "current_storage_mb": round(total_size, 2),
-                    "total_rows": total_rows,
-                    "table_count": len(result.data),
-                    "estimated_growth": growth_estimate,
-                    "growth_trend": "stable"  # Simplified
+            # If information_schema has no data, try COUNT query
+            count_sql = f"SELECT COUNT(*) as rows FROM {full_table_name}"
+            count_result = await connection.execute(count_sql)
+            if count_result.data:
+                return {
+                    "size_mb": 0,  # Cannot get exact size
+                    "rows": count_result.data[0]["rows"]
                 }
-                
-                # Include detailed table information only if requested
-                if detailed_response:
-                    storage_result["largest_tables"] = result.data[:10]
-                else:
-                    # Only include top 3 for summary
-                    storage_result["top_tables_summary"] = result.data[:3]
-                
-                return storage_result
             
-            return {"current_storage_mb": 0, "message": "No storage data available"}
+            return None
             
         except Exception as e:
-            logger.warning(f"Failed to analyze storage growth: {str(e)}")
-            return {"error": str(e)}
+            logger.warning(f"Failed to get current size for {full_table_name}: {str(e)}")
+            return None
     
-    def _estimate_storage_growth(self, table_data: List[Dict]) -> Dict[str, Any]:
-        """Estimate storage growth based on current data"""
-        # This is a simplified estimation - real implementation would use historical data
-        total_size = sum(row.get("size_mb", 0) for row in table_data)
+    async def _get_partition_based_growth_data(
+        self, connection, table_name: str, schema_name: str, days: int
+    ) -> List[Dict]:
+        """Get historical growth data based on partitions"""
+        try:
+            # Query partition information
+            partition_sql = f"""
+            SELECT 
+                partition_name,
+                partition_description,
+                table_rows,
+                data_length,
+                create_time
+            FROM information_schema.partitions
+            WHERE table_schema = '{schema_name or ""}'
+                AND table_name = '{table_name}'
+                AND partition_name IS NOT NULL
+                AND create_time IS NOT NULL
+                AND create_time >= DATE_SUB(NOW(), INTERVAL {days} DAY)
+            ORDER BY create_time DESC
+            """
+            
+            result = await connection.execute(partition_sql)
+            if not result.data:
+                return []
+            
+            # Process partition data, aggregate by date
+            daily_data = defaultdict(lambda: {"rows": 0, "size_mb": 0})
+            
+            for partition in result.data:
+                create_date = partition["create_time"]
+                if isinstance(create_date, str):
+                    create_date = datetime.fromisoformat(create_date.replace('Z', '+00:00'))
+                
+                date_key = create_date.date().isoformat()
+                table_rows = partition.get("table_rows", 0) or 0
+                data_length = partition.get("data_length", 0) or 0
+                daily_data[date_key]["rows"] += table_rows
+                daily_data[date_key]["size_mb"] += (data_length / 1024 / 1024)
+            
+            # Convert to list format
+            historical_data = []
+            for date_str, data in sorted(daily_data.items()):
+                historical_data.append({
+                    "date": date_str,
+                    "rows": data["rows"],
+                    "size_mb": round(data["size_mb"], 2),
+                    "data_source": "partition_create_time"
+                })
+            
+            return historical_data
+            
+        except Exception as e:
+            logger.warning(f"Failed to get partition-based growth data: {str(e)}")
+            return []
+    
+    async def _get_timestamp_based_growth_data(
+        self, connection, full_table_name: str, table_name: str, schema_name: str, days: int
+    ) -> List[Dict]:
+        """Get historical growth data based on timestamp fields"""
+        try:
+            # Find possible timestamp fields
+            timestamp_columns = await self._find_timestamp_columns(connection, table_name, schema_name)
+            if not timestamp_columns:
+                return []
+            
+            # Use best timestamp field for analysis
+            time_column = timestamp_columns[0]
+            
+            # Aggregate data by date
+            growth_sql = f"""
+            SELECT 
+                DATE({time_column}) as date,
+                COUNT(*) as daily_records,
+                COUNT(*) / SUM(COUNT(*)) OVER() * 100 as percentage
+            FROM {full_table_name}
+            WHERE {time_column} >= DATE_SUB(NOW(), INTERVAL {days} DAY)
+                AND {time_column} IS NOT NULL
+            GROUP BY DATE({time_column})
+            ORDER BY date DESC
+            """
+            
+            result = await connection.execute(growth_sql)
+            if not result.data:
+                return []
+            
+            # Calculate cumulative growth
+            cumulative_rows = 0
+            historical_data = []
+            
+            for row in reversed(result.data):  # Start from earliest date
+                cumulative_rows += row["daily_records"]
+                historical_data.append({
+                    "date": row["date"].isoformat() if hasattr(row["date"], 'isoformat') else str(row["date"]),
+                    "daily_records": row["daily_records"],
+                    "cumulative_rows": cumulative_rows,
+                    "data_source": f"timestamp_field_{time_column}"
+                })
+            
+            return list(reversed(historical_data))  # Return with latest date first
+            
+        except Exception as e:
+            logger.warning(f"Failed to get timestamp-based growth data: {str(e)}")
+            return []
+    
+    async def _find_timestamp_columns(self, connection, table_name: str, schema_name: str) -> List[str]:
+        """Find timestamp fields in table"""
+        try:
+            timestamp_sql = f"""
+            SELECT column_name, data_type
+            FROM information_schema.columns
+            WHERE table_schema = '{schema_name or ""}'
+                AND table_name = '{table_name}'
+                AND (
+                    data_type IN ('datetime', 'timestamp', 'date')
+                    OR column_name REGEXP '(create|insert|update|modify).*time'
+                    OR column_name REGEXP '.*date'
+                    OR column_name REGEXP '(created|updated|modified)_(at|on)'
+                )
+            ORDER BY 
+                CASE 
+                    WHEN column_name REGEXP '(create|insert).*time' THEN 1
+                    WHEN column_name REGEXP 'update.*time' THEN 2
+                    WHEN data_type IN ('datetime', 'timestamp') THEN 3
+                    WHEN data_type = 'date' THEN 4
+                    ELSE 5
+                END
+            """
+            
+            result = await connection.execute(timestamp_sql)
+            return [row["column_name"] for row in result.data] if result.data else []
+            
+        except Exception as e:
+            logger.warning(f"Failed to find timestamp columns: {str(e)}")
+            return []
+    
+    async def _get_audit_based_growth_estimation(
+        self, connection, table_name: str, days: int
+    ) -> List[Dict]:
+        """Estimate growth data based on audit logs"""
+        try:
+            # Analyze operation history for this table
+            audit_sql = f"""
+            SELECT 
+                DATE(`time`) as operation_date,
+                COUNT(*) as operation_count,
+                SUM(CASE WHEN stmt LIKE 'INSERT%' THEN 1 ELSE 0 END) as insert_count,
+                SUM(CASE WHEN stmt LIKE 'UPDATE%' THEN 1 ELSE 0 END) as update_count,
+                SUM(CASE WHEN stmt LIKE 'DELETE%' THEN 1 ELSE 0 END) as delete_count
+            FROM internal.__internal_schema.audit_log
+            WHERE `time` >= DATE_SUB(NOW(), INTERVAL {days} DAY)
+                AND stmt IS NOT NULL
+                AND (
+                    stmt LIKE '%{table_name}%'
+                    OR stmt LIKE '%{table_name.split(".")[-1]}%'
+                )
+            GROUP BY DATE(`time`)
+            ORDER BY operation_date DESC
+            """
+            
+            result = await connection.execute(audit_sql)
+            if not result.data:
+                return []
+            
+            # Estimate growth based on operation patterns
+            historical_data = []
+            for row in result.data:
+                # Simple growth estimation: INSERT operations indicate data growth
+                estimated_growth = row["insert_count"] * 1000  # Assume each INSERT operation inserts 1000 rows on average
+                
+                historical_data.append({
+                    "date": row["operation_date"].isoformat() if hasattr(row["operation_date"], 'isoformat') else str(row["operation_date"]),
+                    "operation_count": row["operation_count"],
+                    "insert_operations": row["insert_count"],
+                    "estimated_records_added": estimated_growth,
+                    "data_source": "audit_log_estimation"
+                })
+            
+            return historical_data
+            
+        except Exception as e:
+            logger.warning(f"Failed to get audit-based growth estimation: {str(e)}")
+            return []
+    
+    def _calculate_table_growth_metrics(self, historical_data: List[Dict], current_size: Dict) -> Dict[str, Any]:
+        """Calculate table growth metrics"""
+        if not historical_data or len(historical_data) < 2:
+            return {
+                "growth_rate_mb_per_day": 0,
+                "growth_rate_rows_per_day": 0,
+                "growth_trend": "insufficient_data",
+                "confidence": "low"
+            }
         
-        return {
-            "daily_growth_estimate_mb": round(total_size * 0.01, 2),  # 1% daily growth estimate
-            "monthly_growth_estimate_mb": round(total_size * 0.3, 2),  # 30% monthly growth estimate
-            "confidence": "low",  # Low confidence without historical data
-            "method": "simplified_estimation"
-        }
-    
-    async def _analyze_query_volume_growth(self, connection, days: int, detailed_response: bool = False) -> Dict[str, Any]:
-        """Analyze query volume growth patterns"""
         try:
-            start_date = datetime.now() - timedelta(days=days)
+            # Extract numerical data
+            dates = []
+            sizes = []
+            rows = []
             
-            volume_sql = f"""
-            SELECT 
-                DATE(`time`) as query_date,
-                COUNT(*) as query_count,
-                COUNT(DISTINCT `user`) as unique_users
-            FROM internal.__internal_schema.audit_log 
-            WHERE `time` >= '{start_date.strftime('%Y-%m-%d')}'
-                AND `stmt` IS NOT NULL
-            GROUP BY DATE(`time`)
-            ORDER BY query_date DESC
-            LIMIT {days}
-            """
+            for data_point in historical_data:
+                try:
+                    date_obj = datetime.fromisoformat(data_point["date"])
+                    dates.append(date_obj)
+                    
+                    # Handle fields from different data sources
+                    if "size_mb" in data_point:
+                        sizes.append(data_point["size_mb"])
+                    if "cumulative_rows" in data_point:
+                        rows.append(data_point["cumulative_rows"])
+                    elif "rows" in data_point:
+                        rows.append(data_point["rows"])
+                    elif "estimated_records_added" in data_point:
+                        rows.append(data_point["estimated_records_added"])
+                        
+                except (ValueError, KeyError):
+                    continue
             
-            result = await connection.execute(volume_sql)
+            if len(dates) < 2:
+                return {"growth_rate_mb_per_day": 0, "growth_rate_rows_per_day": 0, "growth_trend": "insufficient_data"}
             
-            if result.data:
-                daily_volumes = [row.get("query_count", 0) for row in result.data]
-                avg_daily_queries = statistics.mean(daily_volumes) if daily_volumes else 0
-                
-                # Simple trend analysis
-                trend = "stable"
-                if len(daily_volumes) > 3:
-                    recent_avg = statistics.mean(daily_volumes[:3])
-                    older_avg = statistics.mean(daily_volumes[-3:])
-                    if recent_avg > older_avg * 1.1:
-                        trend = "increasing"
-                    elif recent_avg < older_avg * 0.9:
-                        trend = "decreasing"
-                
-                volume_result = {
-                    "avg_daily_queries": round(avg_daily_queries, 2),
-                    "max_daily_queries": max(daily_volumes) if daily_volumes else 0,
-                    "min_daily_queries": min(daily_volumes) if daily_volumes else 0,
-                    "total_queries": sum(daily_volumes) if daily_volumes else 0,
-                    "trend": trend
-                }
-                
-                # Include detailed daily data only if requested
-                if detailed_response:
-                    # Fix date serialization in daily_data
-                    serializable_data = []
-                    for row in result.data:
-                        serializable_row = {}
-                        for key, value in row.items():
-                            if hasattr(value, 'isoformat'):  # datetime/date object
-                                serializable_row[key] = value.isoformat()
-                            else:
-                                serializable_row[key] = value
-                        serializable_data.append(serializable_row)
-                    volume_result["daily_data"] = serializable_data
-                else:
-                    # Only include recent data summary
-                    volume_result["recent_days_summary"] = {
-                        "last_7_days_avg": round(statistics.mean(daily_volumes[:7]) if len(daily_volumes) >= 7 else avg_daily_queries, 2),
-                        "data_points": min(len(daily_volumes), 7)
-                    }
-                
-                return volume_result
+            # Calculate time span (days)
+            time_span_days = (max(dates) - min(dates)).days
+            if time_span_days == 0:
+                time_span_days = 1
             
-            return {"avg_daily_queries": 0, "message": "No query volume data available"}
+            # Calculate growth rate
+            growth_metrics = {}
+            
+            # Size growth rate
+            if len(sizes) >= 2:
+                size_growth = (max(sizes) - min(sizes)) / time_span_days
+                growth_metrics["growth_rate_mb_per_day"] = round(size_growth, 4)
+            else:
+                growth_metrics["growth_rate_mb_per_day"] = 0
+            
+            # Row count growth rate
+            if len(rows) >= 2:
+                rows_growth = (max(rows) - min(rows)) / time_span_days
+                growth_metrics["growth_rate_rows_per_day"] = round(rows_growth, 2)
+            else:
+                growth_metrics["growth_rate_rows_per_day"] = 0
+            
+            # Growth trend analysis
+            if len(historical_data) >= 3:
+                # Use linear regression to analyze trends
+                growth_metrics["growth_trend"] = self._analyze_growth_trend(dates, sizes if sizes else rows)
+                growth_metrics["confidence"] = "high" if len(historical_data) >= 7 else "medium"
+            else:
+                growth_metrics["growth_trend"] = "stable"
+                growth_metrics["confidence"] = "low"
+            
+            return growth_metrics
             
         except Exception as e:
-            logger.warning(f"Failed to analyze query volume growth: {str(e)}")
-            return {"error": str(e)}
+            logger.warning(f"Failed to calculate growth metrics: {str(e)}")
+            return {"growth_rate_mb_per_day": 0, "growth_rate_rows_per_day": 0, "growth_trend": "error"}
     
-    async def _analyze_user_activity_growth(self, connection, days: int, detailed_response: bool = False) -> Dict[str, Any]:
-        """Analyze user activity growth patterns"""
+    def _analyze_growth_trend(self, dates: List[datetime], values: List[float]) -> str:
+        """Analyze growth trend"""
+        if len(dates) != len(values) or len(values) < 3:
+            return "unknown"
+        
         try:
-            start_date = datetime.now() - timedelta(days=days)
+            # Convert dates to numerical values (days)
+            base_date = min(dates)
+            x_values = [(date - base_date).days for date in dates]
             
-            activity_sql = f"""
-            SELECT 
-                DATE(`time`) as activity_date,
-                COUNT(DISTINCT `user`) as active_users,
-                COUNT(*) as total_queries
-            FROM internal.__internal_schema.audit_log 
-            WHERE `time` >= '{start_date.strftime('%Y-%m-%d')}'
-                AND `stmt` IS NOT NULL
-            GROUP BY DATE(`time`)
-            ORDER BY activity_date DESC
-            LIMIT {days}
-            """
+            # Simple linear regression
+            n = len(x_values)
+            sum_x = sum(x_values)
+            sum_y = sum(values)
+            sum_xy = sum(x * y for x, y in zip(x_values, values))
+            sum_x2 = sum(x * x for x in x_values)
             
-            result = await connection.execute(activity_sql)
+            # Calculate slope
+            denominator = n * sum_x2 - sum_x * sum_x
+            if denominator == 0:
+                return "stable"
             
-            if result.data:
-                daily_users = [row.get("active_users", 0) for row in result.data]
-                avg_daily_users = statistics.mean(daily_users) if daily_users else 0
+            slope = (n * sum_xy - sum_x * sum_y) / denominator
+            
+            # Determine trend
+            if slope > 0.1:
+                return "increasing"
+            elif slope < -0.1:
+                return "decreasing"
+            else:
+                return "stable"
                 
-                activity_result = {
-                    "avg_daily_active_users": round(avg_daily_users, 2),
-                    "max_daily_users": max(daily_users) if daily_users else 0,
-                    "total_unique_users": len(set(row.get("active_users", 0) for row in result.data))
-                }
-                
-                # Include detailed daily activity only if requested
-                if detailed_response:
-                    # Fix date serialization in daily_activity
-                    serializable_activity = []
-                    for row in result.data:
-                        serializable_row = {}
-                        for key, value in row.items():
-                            if hasattr(value, 'isoformat'):  # datetime/date object
-                                serializable_row[key] = value.isoformat()
-                            else:
-                                serializable_row[key] = value
-                        serializable_activity.append(serializable_row)
-                    activity_result["daily_activity"] = serializable_activity
-                else:
-                    # Only include recent activity summary
-                    recent_queries = [row.get("total_queries", 0) for row in result.data[:7]]
-                    activity_result["recent_activity_summary"] = {
-                        "last_7_days_avg_queries": round(statistics.mean(recent_queries) if recent_queries else 0, 2),
-                        "activity_trend": "active" if avg_daily_users > 1 else "low"
-                    }
-                
-                return activity_result
+        except Exception:
+            return "unknown"
+    
+    async def _calculate_overall_storage_growth(self, table_growth_data: List[Dict], days: int) -> Dict[str, Any]:
+        """Calculate overall storage growth"""
+        if not table_growth_data:
+            return {"error": "No table growth data available"}
+        
+        try:
+            # Aggregate growth data from all tables
+            total_growth_mb_per_day = sum(
+                table.get("growth_metrics", {}).get("growth_rate_mb_per_day", 0)
+                for table in table_growth_data
+            )
             
-            return {"avg_daily_active_users": 0, "message": "No user activity data available"}
+            total_growth_rows_per_day = sum(
+                table.get("growth_metrics", {}).get("growth_rate_rows_per_day", 0)
+                for table in table_growth_data
+            )
+            
+            # Calculate growth trend distribution
+            trend_counts = Counter(
+                table.get("growth_metrics", {}).get("growth_trend", "unknown")
+                for table in table_growth_data
+            )
+            
+            # Calculate confidence level
+            high_confidence_tables = sum(
+                1 for table in table_growth_data
+                if table.get("growth_metrics", {}).get("confidence") == "high"
+            )
+            
+            overall_confidence = "high" if high_confidence_tables > len(table_growth_data) * 0.5 else "medium"
+            
+            return {
+                "daily_growth_mb": round(total_growth_mb_per_day, 2),
+                "daily_growth_rows": round(total_growth_rows_per_day, 2),
+                "monthly_growth_mb": round(total_growth_mb_per_day * 30, 2),
+                "monthly_growth_rows": round(total_growth_rows_per_day * 30, 2),
+                "trend_distribution": dict(trend_counts),
+                "overall_trend": trend_counts.most_common(1)[0][0] if trend_counts else "unknown",
+                "confidence_level": overall_confidence,
+                "analysis_method": "aggregated_real_data"
+            }
             
         except Exception as e:
-            logger.warning(f"Failed to analyze user activity growth: {str(e)}")
+            logger.error(f"Failed to calculate overall storage growth: {str(e)}")
             return {"error": str(e)}
     
-    async def _generate_growth_insights(self, resource_analysis: Dict, days: int) -> Dict[str, Any]:
-        """Generate insights from resource growth analysis"""
+    def _calculate_storage_confidence_level(self, table_growth_data: List[Dict]) -> str:
+        """Calculate confidence level for storage analysis"""
+        if not table_growth_data:
+            return "none"
+        
+        # Count tables with historical data
+        tables_with_data = sum(
+            1 for table in table_growth_data
+            if table.get("historical_data") and len(table.get("historical_data", [])) > 0
+        )
+        
+        # Count tables with high confidence
+        high_confidence_tables = sum(
+            1 for table in table_growth_data
+            if table.get("growth_metrics", {}).get("confidence") == "high"
+        )
+        
+        total_tables = len(table_growth_data)
+        
+        if high_confidence_tables > total_tables * 0.7:
+            return "high"
+        elif tables_with_data > total_tables * 0.5:
+            return "medium"
+        else:
+            return "low"
+    
+    async def _generate_enhanced_growth_insights(self, resource_analysis: Dict, days: int) -> Dict[str, Any]:
+        """Generate enhanced growth insights"""
         insights = {}
         
         # Storage insights
         if "storage" in resource_analysis:
             storage = resource_analysis["storage"]
-            if "current_storage_mb" in storage:
+            if "overall_growth_metrics" in storage:
+                metrics = storage["overall_growth_metrics"]
                 insights["storage"] = {
-                    "current_size_gb": round(storage["current_storage_mb"] / 1024, 2),
-                    "growth_rate": storage.get("estimated_growth", {}).get("daily_growth_estimate_mb", 0),
-                    "capacity_concern": storage["current_storage_mb"] > 10000  # > 10GB
+                    "current_status": f"Total storage: {storage.get('current_total_storage_mb', 0):.2f} MB",
+                    "growth_rate": f"Daily growth: {metrics.get('daily_growth_mb', 0):.2f} MB",
+                    "monthly_projection": f"Monthly growth estimate: {metrics.get('monthly_growth_mb', 0):.2f} MB",
+                    "trend": metrics.get("overall_trend", "unknown"),
+                    "confidence": metrics.get("confidence_level", "unknown"),
+                    "analysis_quality": f"Based on real historical data from {storage.get('total_tables_analyzed', 0)} tables"
                 }
         
-        # Query volume insights
+        # Query volume insights (keep original logic as it's already based on real data)
         if "query_volume" in resource_analysis:
-            volume = resource_analysis["query_volume"]
+            query_vol = resource_analysis["query_volume"]
             insights["query_volume"] = {
-                "daily_average": volume.get("avg_daily_queries", 0),
-                "load_level": "high" if volume.get("avg_daily_queries", 0) > 1000 else "normal",
-                "trend": volume.get("trend", "stable")
+                "avg_daily_queries": query_vol.get("avg_daily_queries", 0),
+                "trend": query_vol.get("trend", "stable"),
+                "analysis_period": f"{days} days of historical data"
             }
         
-        # User activity insights
+        # User activity insights (keep original logic)
         if "user_activity" in resource_analysis:
-            activity = resource_analysis["user_activity"]
+            user_activity = resource_analysis["user_activity"]
             insights["user_activity"] = {
-                "active_user_base": activity.get("avg_daily_active_users", 0),
-                "user_engagement": "high" if activity.get("avg_daily_active_users", 0) > 10 else "normal"
+                "avg_daily_users": user_activity.get("avg_daily_active_users", 0),
+                "max_daily_users": user_activity.get("max_daily_users", 0),
+                "analysis_period": f"{days} days of historical data"
             }
         
         return insights
     
-    async def _generate_growth_predictions(self, resource_analysis: Dict) -> Dict[str, Any]:
-        """Generate growth predictions (simplified)"""
+    async def _generate_statistical_growth_predictions(self, resource_analysis: Dict, days: int) -> Dict[str, Any]:
+        """Generate growth predictions based on statistical methods"""
         predictions = {}
         
-        # This is a simplified prediction model
-        # Real implementation would use time series analysis
-        
-        if "storage" in resource_analysis:
-            storage = resource_analysis["storage"]
-            current_size = storage.get("current_storage_mb", 0)
-            daily_growth = storage.get("estimated_growth", {}).get("daily_growth_estimate_mb", 0)
+        try:
+            # Storage predictions
+            if "storage" in resource_analysis:
+                storage = resource_analysis["storage"]
+                if "overall_growth_metrics" in storage:
+                    metrics = storage["overall_growth_metrics"]
+                    daily_growth = metrics.get("daily_growth_mb", 0)
+                    confidence = metrics.get("confidence_level", "low")
+                    
+                    # Make predictions based on real growth rates
+                    predictions["storage"] = {
+                        "next_30_days_mb": round(daily_growth * 30, 2),
+                        "next_90_days_mb": round(daily_growth * 90, 2),
+                        "next_365_days_mb": round(daily_growth * 365, 2),
+                        "prediction_method": "linear_extrapolation_from_real_data",
+                        "confidence": confidence,
+                        "warning": "Predictions based on historical trends, actual growth may vary due to business changes"
+                    }
             
-            predictions["storage"] = {
-                "30_day_projection_mb": round(current_size + (daily_growth * 30), 2),
-                "90_day_projection_mb": round(current_size + (daily_growth * 90), 2),
-                "confidence": "low"
-            }
+            # Query volume predictions
+            if "query_volume" in resource_analysis:
+                query_vol = resource_analysis["query_volume"]
+                avg_queries = query_vol.get("avg_daily_queries", 0)
+                trend = query_vol.get("trend", "stable")
+                
+                # Adjust predictions based on trends
+                growth_factor = 1.0
+                if trend == "increasing":
+                    growth_factor = 1.1  # 10% growth
+                elif trend == "decreasing":
+                    growth_factor = 0.9   # 10% decline
+                
+                predictions["query_volume"] = {
+                    "next_30_days_avg": round(avg_queries * growth_factor, 2),
+                    "prediction_method": "trend_based_extrapolation",
+                    "confidence": "medium"
+                }
+            
+            return predictions
+            
+        except Exception as e:
+            logger.error(f"Failed to generate statistical predictions: {str(e)}")
+            return {"error": str(e)}
+    
+    def _generate_enhanced_growth_recommendations(self, growth_insights: Dict, predictions: Dict) -> List[Dict]:
+        """Generate enhanced growth recommendations"""
+        recommendations = []
         
-        return predictions
+        try:
+            # Storage-related recommendations
+            if "storage" in growth_insights:
+                storage_insight = growth_insights["storage"]
+                confidence = storage_insight.get("confidence", "low")
+                trend = storage_insight.get("trend", "unknown")
+                
+                if confidence == "high" and trend == "increasing":
+                    recommendations.append({
+                        "category": "storage_capacity",
+                        "priority": "high",
+                        "title": "Storage Capacity Planning",
+                        "description": "Based on real historical data analysis, storage shows significant growth trend",
+                        "action": "Recommend advance storage expansion planning, consider data archiving strategies"
+                    })
+                
+                if "storage" in predictions:
+                    storage_pred = predictions["storage"]
+                    yearly_growth = storage_pred.get("next_365_days_mb", 0)
+                    if yearly_growth > 100000:  # 100GB
+                        recommendations.append({
+                            "category": "storage_optimization",
+                            "priority": "medium",
+                            "title": "Data Compression Optimization",
+                            "description": f"Expected annual growth {yearly_growth/1024:.1f} GB",
+                            "action": "Consider enabling data compression and optimizing storage formats"
+                        })
+            
+            # Data quality recommendations
+            recommendations.append({
+                "category": "data_monitoring",
+                "priority": "medium",
+                "title": "Continuous Monitoring",
+                "description": "Establish growth monitoring system based on real historical data",
+                "action": "Regularly analyze partition growth and timestamp field distribution, detect abnormal growth promptly"
+            })
+            
+            return recommendations
+            
+        except Exception as e:
+            logger.error(f"Failed to generate enhanced recommendations: {str(e)}")
+            return []
     
     def _generate_performance_recommendations(self, performance_insights: Dict, pattern_analysis: Dict) -> List[Dict]:
         """Generate performance improvement recommendations"""
@@ -727,32 +1686,4 @@ class PerformanceAnalyticsTools:
                     "action": "Review partitioning strategies and add appropriate indexes"
                 })
         
-        return recommendations
-    
-    def _generate_growth_recommendations(self, growth_insights: Dict, predictions: Dict) -> List[Dict]:
-        """Generate resource growth recommendations"""
-        recommendations = []
-        
-        # Storage recommendations
-        storage_insights = growth_insights.get("storage", {})
-        if storage_insights.get("capacity_concern", False):
-            recommendations.append({
-                "type": "capacity_planning",
-                "priority": "medium",
-                "title": "Storage capacity monitoring needed",
-                "description": "Current storage usage is significant",
-                "action": "Implement storage monitoring and consider data archival strategies"
-            })
-        
-        # Query volume recommendations
-        query_insights = growth_insights.get("query_volume", {})
-        if query_insights.get("load_level") == "high":
-            recommendations.append({
-                "type": "performance_scaling",
-                "priority": "medium",
-                "title": "High query volume detected",
-                "description": "System is handling high query volumes",
-                "action": "Monitor system performance and consider scaling strategies"
-            })
-        
         return recommendations 
\ No newline at end of file
diff --git a/doris_mcp_server/utils/security_analytics_tools.py b/doris_mcp_server/utils/security_analytics_tools.py
index 103b6c1..74d3c54 100644
--- a/doris_mcp_server/utils/security_analytics_tools.py
+++ b/doris_mcp_server/utils/security_analytics_tools.py
@@ -56,16 +56,31 @@ class SecurityAnalyticsTools:
         """
         try:
             start_time = time.time()
+            
+            # 🚀 PROGRESS: Initialize security analysis
+            logger.info("=" * 70)
+            logger.info(f"🔒 Starting Data Access Pattern Analysis")
+            logger.info(f"📅 Analysis period: {days} days")
+            logger.info(f"👥 Include system users: {include_system_users}")
+            logger.info(f"🎯 Min query threshold: {min_query_threshold}")
+            logger.info("=" * 70)
+            
             connection = await self.connection_manager.get_connection("query")
             
             # Define analysis period
             end_date = datetime.now()
             start_date = end_date - timedelta(days=days)
             
-            # 1. Get audit log data
+            logger.info(f"📊 Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
+            
+            # 🚀 PROGRESS: Step 1 - Get audit log data
+            logger.info("📋 Step 1/5: Retrieving audit log data...")
+            audit_start = time.time()
             audit_data = await self._get_audit_log_data(connection, start_date, end_date, include_system_users)
+            audit_time = time.time() - audit_start
             
             if not audit_data:
+                logger.warning("⚠️ No audit data available for the specified period")
                 return {
                     "error": "No audit data available for the specified period",
                     "analysis_period": {
@@ -75,25 +90,49 @@ class SecurityAnalyticsTools:
                     }
                 }
             
-            # 2. Analyze user access patterns
+            logger.info(f"✅ Retrieved {len(audit_data)} audit records in {audit_time:.2f}s")
+            
+            # 🚀 PROGRESS: Step 2 - Analyze user access patterns
+            logger.info("👤 Step 2/5: Analyzing user access patterns...")
+            user_start = time.time()
             user_access_analysis = await self._analyze_user_access_patterns(
                 audit_data, min_query_threshold
             )
+            user_time = time.time() - user_start
+            logger.info(f"✅ Analyzed {len(user_access_analysis)} users in {user_time:.2f}s")
             
-            # 3. Analyze role-based access
+            # 🚀 PROGRESS: Step 3 - Analyze role-based access
+            logger.info("🎭 Step 3/5: Analyzing role-based access patterns...")
+            role_start = time.time()
             role_access_analysis = await self._analyze_role_access_patterns(
                 connection, user_access_analysis
             )
+            role_time = time.time() - role_start
+            logger.info(f"✅ Role analysis completed in {role_time:.2f}s")
             
-            # 4. Detect security anomalies
+            # 🚀 PROGRESS: Step 4 - Detect security anomalies
+            logger.info("🚨 Step 4/5: Detecting security anomalies...")
+            anomaly_start = time.time()
             security_alerts = await self._detect_security_anomalies(
                 audit_data, user_access_analysis
             )
+            anomaly_time = time.time() - anomaly_start
+            logger.info(f"✅ Found {len(security_alerts)} security alerts in {anomaly_time:.2f}s")
             
-            # 5. Generate access insights
+            # Log alert summary
+            if security_alerts:
+                high_alerts = sum(1 for alert in security_alerts if alert.get("severity") == "high")
+                medium_alerts = sum(1 for alert in security_alerts if alert.get("severity") == "medium")
+                logger.info(f"🚨 Alert breakdown: {high_alerts} high, {medium_alerts} medium")
+            
+            # 🚀 PROGRESS: Step 5 - Generate access insights
+            logger.info("💡 Step 5/5: Generating access insights...")
+            insights_start = time.time()
             access_insights = await self._generate_access_insights(
                 user_access_analysis, role_access_analysis
             )
+            insights_time = time.time() - insights_start
+            logger.info(f"✅ Access insights generated in {insights_time:.2f}s")
             
             execution_time = time.time() - start_time