diff --git a/.env.example b/.env.example index 03f9e47..039be0c 100644 --- a/.env.example +++ b/.env.example @@ -1,89 +1,168 @@ -# Doris MCP Server Configuration -# Copy this file to .env and modify the values according to your environment +# =================================================================== +# Doris MCP Server Environment Configuration Example +# =================================================================== +# Copy this file to .env and modify the configuration values as needed -# ============================================================================= -# Database Configuration -# ============================================================================= +# =================================================================== +# Database Connection Configuration +# =================================================================== -# Doris FE connection settings +# Doris FE (Frontend) connection settings DORIS_HOST=localhost DORIS_PORT=9030 DORIS_USER=root DORIS_PASSWORD= DORIS_DATABASE=information_schema -# Doris FE HTTP API port +# Doris FE HTTP API port (for Profile and other HTTP APIs) DORIS_FE_HTTP_PORT=8030 -# BE nodes configuration for external access -# If DORIS_BE_HOSTS is empty, will use "show backends" to get BE nodes automatically -# Format: comma-separated list of BE host addresses -# Example: DORIS_BE_HOSTS=192.168.1.100,192.168.1.101,192.168.1.102 +# Doris BE (Backend) nodes configuration (optional, for external access) +# Format: host1,host2,host3 (if empty, will use "show backends" to get BE nodes) DORIS_BE_HOSTS= - -# BE webserver port for HTTP APIs (memory tracker, metrics, etc.) DORIS_BE_WEBSERVER_PORT=8040 -# ============================================================================= -# Connection Pool Configuration -# ============================================================================= - +# Connection pool configuration DORIS_MAX_CONNECTIONS=20 DORIS_CONNECTION_TIMEOUT=30 DORIS_HEALTH_CHECK_INTERVAL=60 DORIS_MAX_CONNECTION_AGE=3600 -# ============================================================================= -# Profile And Explain Max Data Size -# ============================================================================= -MAX_RESPONSE_CONTENT_SIZE=4096 - -# ============================================================================= +# =================================================================== # Security Configuration -# ============================================================================= +# =================================================================== -ENABLE_SECURITY_CHECK=true -BLOCKED_KEYWORDS="DROP,TRUNCATE,DELETE,SHUTDOWN,INSERT,UPDATE,CREATE,ALTER,GRANT,REVOKE,KILL" +# Authentication configuration AUTH_TYPE=token TOKEN_SECRET=your_secret_key_here TOKEN_EXPIRY=3600 -MAX_RESULT_ROWS=10000 + +# SQL security check +ENABLE_SECURITY_CHECK=true + +# Blocked keywords (comma separated) +BLOCKED_KEYWORDS=DROP,CREATE,ALTER,TRUNCATE,DELETE,INSERT,UPDATE,GRANT,REVOKE,EXEC,EXECUTE,SHUTDOWN,KILL + +# Query limits MAX_QUERY_COMPLEXITY=100 +MAX_RESULT_ROWS=10000 + +# Data masking ENABLE_MASKING=true -# ============================================================================= +# =================================================================== # Performance Configuration -# ============================================================================= +# =================================================================== +# Query cache ENABLE_QUERY_CACHE=true CACHE_TTL=300 MAX_CACHE_SIZE=1000 + +# Concurrency control MAX_CONCURRENT_QUERIES=50 QUERY_TIMEOUT=300 -# ============================================================================= -# Logging Configuration -# ============================================================================= +# Response content size limit (characters) +MAX_RESPONSE_CONTENT_SIZE=4096 +# =================================================================== +# Logging Configuration +# =================================================================== + +# Basic logging configuration LOG_LEVEL=INFO LOG_FILE_PATH= + +# Audit logging ENABLE_AUDIT=true AUDIT_FILE_PATH= -# ============================================================================= -# Monitoring Configuration -# ============================================================================= +# Log file rotation configuration +LOG_MAX_FILE_SIZE=10485760 +LOG_BACKUP_COUNT=5 +# =================================================================== +# Log Cleanup Configuration - NEW! +# =================================================================== + +# Enable automatic log cleanup +ENABLE_LOG_CLEANUP=true + +# Maximum age of log files in days (files older than this will be deleted) +LOG_MAX_AGE_DAYS=30 + +# Cleanup check interval in hours +LOG_CLEANUP_INTERVAL_HOURS=24 + +# =================================================================== +# Monitoring Configuration +# =================================================================== + +# Metrics collection ENABLE_METRICS=true METRICS_PORT=3001 HEALTH_CHECK_PORT=3002 + +# Alert configuration ENABLE_ALERTS=false ALERT_WEBHOOK_URL= -# ============================================================================= +# =================================================================== # Server Configuration -# ============================================================================= +# =================================================================== +# Basic server information SERVER_NAME=doris-mcp-server -SERVER_VERSION=0.4.2 +SERVER_VERSION=0.4.3 SERVER_PORT=3000 + +# Temporary files directory +TEMP_FILES_DIR=tmp + +# =================================================================== +# Configuration Examples for Different Environments +# =================================================================== + +# Development Environment Example: +# LOG_LEVEL=DEBUG +# LOG_MAX_AGE_DAYS=7 +# LOG_CLEANUP_INTERVAL_HOURS=6 +# ENABLE_SECURITY_CHECK=false + +# Production Environment Example: +# LOG_LEVEL=INFO +# LOG_MAX_AGE_DAYS=30 +# LOG_CLEANUP_INTERVAL_HOURS=24 +# ENABLE_SECURITY_CHECK=true +# ENABLE_LOG_CLEANUP=true + +# Testing Environment Example: +# LOG_LEVEL=WARNING +# LOG_MAX_AGE_DAYS=3 +# LOG_CLEANUP_INTERVAL_HOURS=1 +# MAX_RESULT_ROWS=1000 + +# =================================================================== +# Advanced Configuration Notes +# =================================================================== + +# 1. Log Cleanup Feature: +# - ENABLE_LOG_CLEANUP: Controls whether to enable automatic cleanup +# - LOG_MAX_AGE_DAYS: File retention days, recommended 30 days for production, 7 days for development +# - LOG_CLEANUP_INTERVAL_HOURS: Check frequency, recommended 24 hours + +# 2. Security Best Practices: +# - Must change TOKEN_SECRET in production environment +# - Adjust BLOCKED_KEYWORDS according to business needs +# - Enable ENABLE_SECURITY_CHECK and ENABLE_MASKING + +# 3. Performance Tuning: +# - Adjust MAX_CONCURRENT_QUERIES based on hardware resources +# - Adjust QUERY_TIMEOUT based on query complexity +# - Adjust MAX_CACHE_SIZE based on memory size + +# 4. Connection Pool Optimization: +# - DORIS_MAX_CONNECTIONS recommended to be 2-4 times the number of CPU cores +# - DORIS_CONNECTION_TIMEOUT adjust based on network latency +# - DORIS_MAX_CONNECTION_AGE recommended 1 hour to avoid long connection issues \ No newline at end of file diff --git a/README.md b/README.md index 1dffe9f..9ae81f7 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Doris MCP (Model Context Protocol) Server is a backend service built with Python - **🎯 Centralized Configuration Management**: All security keywords now managed through single configuration source with consistent enforcement across all components - **🔧 MCP Version Compatibility**: Resolved MCP library version conflicts with intelligent compatibility layer supporting both MCP 1.8.x and 1.9.x versions - **🚀 Production Reliability**: Enhanced error handling, connection diagnostics, and automatic recovery from database connection issues -- **🙏 Community Contribution**: Special thanks to Hailin Xie for supporting the doris-mcp-server project by graciously transferring the PyPI project to the community free of charge, contributing to open source. The mcp-doris-server repository will be retained but no longer maintained, with ongoing development continuing on the doris-mcp-server repository +- **🙏 Community Contribution**: Special thanks to Hailin Xie for supporting the doris-mcp-server project by graciously transferring the PyPI project to the community free of charge, contributing to open source. The doris-mcp-server repository will be retained but no longer maintained, with ongoing development continuing on the doris-mcp-server repository > **🔧 Key Improvements**: Resolved connection stability issues, unified security keyword management, added comprehensive environment variable configuration for security policies, and fixed MCP library version compatibility conflicts. @@ -67,7 +67,7 @@ Doris MCP (Model Context Protocol) Server is a backend service built with Python pip install doris-mcp-server # Install specific version -pip install doris-mcp-server==0.4.2 +pip install doris-mcp-server==0.4.3 ``` > **💡 Command Compatibility**: After installation, both `doris-mcp-server` commands are available for backward compatibility. You can use either command interchangeably. @@ -189,6 +189,9 @@ cp .env.example .env * `LOG_LEVEL`: Log level (DEBUG/INFO/WARNING/ERROR, default: INFO) * `LOG_FILE_PATH`: Log file path * `ENABLE_AUDIT`: Enable audit logging (default: true) + * `ENABLE_LOG_CLEANUP`: Enable automatic log cleanup (default: true, New in v0.4.3) + * `LOG_MAX_AGE_DAYS`: Maximum age of log files in days (default: 30, New in v0.4.3) + * `LOG_CLEANUP_INTERVAL_HOURS`: Log cleanup check interval in hours (default: 24, New in v0.4.3) ### Available MCP Tools @@ -264,7 +267,7 @@ The Doris MCP Server supports **catalog federation**, enabling interaction with * **Multi-Catalog Metadata Access**: All metadata tools (`get_db_list`, `get_db_table_list`, `get_table_schema`, etc.) support an optional `catalog_name` parameter to query specific catalogs. * **Cross-Catalog SQL Queries**: Execute SQL queries that span multiple catalogs using three-part table naming. -* **Catalog Discovery**: Use `mcp_doris_get_catalog_list` to discover available catalogs and their types. +* **Catalog Discovery**: Use `get_catalog_list` to discover available catalogs and their types. #### Three-Part Naming Requirement: @@ -278,7 +281,7 @@ The Doris MCP Server supports **catalog federation**, enabling interaction with 1. **Get Available Catalogs:** ```json { - "tool_name": "mcp_doris_get_catalog_list", + "tool_name": "get_catalog_list", "arguments": {"random_string": "unique_id"} } ``` @@ -286,7 +289,7 @@ The Doris MCP Server supports **catalog federation**, enabling interaction with 2. **Get Databases in Specific Catalog:** ```json { - "tool_name": "mcp_doris_get_db_list", + "tool_name": "get_db_list", "arguments": {"random_string": "unique_id", "catalog_name": "mysql"} } ``` @@ -294,7 +297,7 @@ The Doris MCP Server supports **catalog federation**, enabling interaction with 3. **Query Internal Catalog:** ```json { - "tool_name": "mcp_doris_exec_query", + "tool_name": "exec_query", "arguments": { "random_string": "unique_id", "sql": "SELECT COUNT(*) FROM internal.ssb.customer" @@ -305,7 +308,7 @@ The Doris MCP Server supports **catalog federation**, enabling interaction with 4. **Query External Catalog:** ```json { - "tool_name": "mcp_doris_exec_query", + "tool_name": "exec_query", "arguments": { "random_string": "unique_id", "sql": "SELECT COUNT(*) FROM mysql.ssb.customer" @@ -316,7 +319,7 @@ The Doris MCP Server supports **catalog federation**, enabling interaction with 5. **Cross-Catalog Query:** ```json { - "tool_name": "mcp_doris_exec_query", + "tool_name": "exec_query", "arguments": { "random_string": "unique_id", "sql": "SELECT i.c_name, m.external_data FROM internal.ssb.customer i JOIN mysql.test.user_info m ON i.c_custkey = m.customer_id" @@ -589,7 +592,7 @@ Stdio mode allows Cursor to manage the server process directly. Configuration is Install the package from PyPI and configure Cursor to use it: ```bash -pip install mcp-doris-server +pip install doris-mcp-server ``` **Configure Cursor:** Add an entry like the following to your Cursor MCP configuration: @@ -989,16 +992,16 @@ Recommendations: DORIS_MAX_CONNECTIONS=20 ``` -### Q: How to resolve `at_eof` connection errors? (Fixed in v0.4.2) +### Q: How to resolve `at_eof` connection errors? (Fixed in v0.4.3) -**A:** Version 0.4.2 has resolved the critical `at_eof` connection errors. The improvements include: +**A:** Version 0.4.3 has resolved the critical `at_eof` connection errors. The improvements include: 1. **Enhanced Connection Health Monitoring**: Strict connection state validation before operations 2. **Automatic Retry Mechanism**: Failed queries are automatically retried up to 2 times 3. **Proactive Connection Cleanup**: Automatic detection and cleanup of problematic connections 4. **Connection Diagnostics**: Comprehensive connection health analysis and reporting -If you still encounter connection issues after upgrading to v0.4.2: +If you still encounter connection issues after upgrading to v0.4.3: ```bash # Check connection diagnostics # The system now automatically handles connection recovery @@ -1040,7 +1043,7 @@ pip uninstall mcp pip install mcp==1.8.0 # Or upgrade to latest compatible version -pip install --upgrade mcp-doris-server==0.4.2 +pip install --upgrade doris-mcp-server==0.4.2 ``` ### Q: How to view server logs? diff --git a/doris_mcp_server/main.py b/doris_mcp_server/main.py index eb02ca9..e5d42f8 100644 --- a/doris_mcp_server/main.py +++ b/doris_mcp_server/main.py @@ -214,8 +214,7 @@ from .utils.db import DorisConnectionManager from .utils.security import DorisSecurityManager import os -# Configure logging -logging.basicConfig(level=logging.INFO) +# Configure logging - will be properly initialized later logger = logging.getLogger(__name__) # Create a default config instance for getting default values @@ -240,7 +239,9 @@ class DorisServer: self.tools_manager = DorisToolsManager(self.connection_manager) self.prompts_manager = DorisPromptsManager(self.connection_manager) - self.logger = logging.getLogger(f"{__name__}.DorisServer") + # Import here to avoid circular imports + from .utils.logger import get_logger + self.logger = get_logger(f"{__name__}.DorisServer") self._setup_handlers() def _get_mcp_capabilities(self): @@ -679,9 +680,6 @@ async def main(): parser = create_arg_parser() args = parser.parse_args() - # Set log level - logging.getLogger().setLevel(getattr(logging, args.log_level)) - # Create configuration - priority: command line arguments > .env file > default values config = DorisConfig.from_env() # First load from .env file and environment variables @@ -699,6 +697,22 @@ async def main(): if args.log_level != _default_config.logging.level: config.logging.level = args.log_level + # Initialize enhanced logging system + from .utils.config import ConfigManager + config_manager = ConfigManager(config) + config_manager.setup_logging() + + # Get logger with proper configuration + from .utils.logger import get_logger, log_system_info + logger = get_logger(__name__) + + # Log system information for debugging + log_system_info() + + logger.info("Starting Doris MCP Server...") + logger.info(f"Transport: {args.transport}") + logger.info(f"Log Level: {config.logging.level}") + # Create server instance server = DorisServer(config) @@ -728,6 +742,10 @@ async def main(): await server.shutdown() except Exception as shutdown_error: logger.error(f"Error occurred while shutting down server: {shutdown_error}") + + # Shutdown logging system + from .utils.logger import shutdown_logging + shutdown_logging() return 0 diff --git a/doris_mcp_server/utils/config.py b/doris_mcp_server/utils/config.py index d48d118..438a3e0 100644 --- a/doris_mcp_server/utils/config.py +++ b/doris_mcp_server/utils/config.py @@ -32,6 +32,8 @@ try: except ImportError: load_dotenv = None +from .logger import get_logger + @dataclass class DatabaseConfig: @@ -144,6 +146,11 @@ class LoggingConfig: # Audit log configuration enable_audit: bool = True audit_file_path: str | None = None + + # Log cleanup configuration + enable_cleanup: bool = True + max_age_days: int = 30 + cleanup_interval_hours: int = 24 @dataclass @@ -327,6 +334,15 @@ class DorisConfig: os.getenv("ENABLE_AUDIT", str(config.logging.enable_audit).lower()).lower() == "true" ) config.logging.audit_file_path = os.getenv("AUDIT_FILE_PATH", config.logging.audit_file_path) + config.logging.enable_cleanup = ( + os.getenv("ENABLE_LOG_CLEANUP", str(config.logging.enable_cleanup).lower()).lower() == "true" + ) + config.logging.max_age_days = int( + os.getenv("LOG_MAX_AGE_DAYS", str(config.logging.max_age_days)) + ) + config.logging.cleanup_interval_hours = int( + os.getenv("LOG_CLEANUP_INTERVAL_HOURS", str(config.logging.cleanup_interval_hours)) + ) # Monitoring configuration config.monitoring.enable_metrics = ( @@ -454,6 +470,9 @@ class DorisConfig: "backup_count": self.logging.backup_count, "enable_audit": self.logging.enable_audit, "audit_file_path": self.logging.audit_file_path, + "enable_cleanup": self.logging.enable_cleanup, + "max_age_days": self.logging.max_age_days, + "cleanup_interval_hours": self.logging.cleanup_interval_hours, }, "monitoring": { "enable_metrics": self.monitoring.enable_metrics, @@ -531,6 +550,12 @@ class DorisConfig: if self.logging.backup_count < 0: errors.append("Log backup count cannot be negative") + + if self.logging.max_age_days <= 0: + errors.append("Log max age days must be greater than 0") + + if self.logging.cleanup_interval_hours <= 0: + errors.append("Log cleanup interval hours must be greater than 0") # Validate monitoring configuration if not (1 <= self.monitoring.metrics_port <= 65535): @@ -576,56 +601,41 @@ class ConfigManager: self.logger = logging.getLogger(__name__) def setup_logging(self): - """Setup logging configuration""" - # Configure root logger - root_logger = logging.getLogger() - root_logger.setLevel(getattr(logging, self.config.logging.level.upper())) - - # Clear existing handlers - for handler in root_logger.handlers[:]: - root_logger.removeHandler(handler) - - # Create formatter - formatter = logging.Formatter(self.config.logging.format) - - # Console handler - console_handler = logging.StreamHandler() - console_handler.setFormatter(formatter) - root_logger.addHandler(console_handler) - - # File handler (if configured) + """Setup logging configuration using enhanced logger""" + from .logger import setup_logging, get_logger + + # Determine log directory + log_dir = "logs" if self.config.logging.file_path: - try: - from logging.handlers import RotatingFileHandler - - file_handler = RotatingFileHandler( - self.config.logging.file_path, - maxBytes=self.config.logging.max_file_size, - backupCount=self.config.logging.backup_count, - encoding="utf-8", - ) - file_handler.setFormatter(formatter) - root_logger.addHandler(file_handler) - except Exception as e: - self.logger.warning(f"Failed to setup file logging: {e}") - - # Audit log handler (if configured) - if self.config.logging.enable_audit and self.config.logging.audit_file_path: - try: - from logging.handlers import RotatingFileHandler - - audit_logger = logging.getLogger("audit") - audit_handler = RotatingFileHandler( - self.config.logging.audit_file_path, - maxBytes=self.config.logging.max_file_size, - backupCount=self.config.logging.backup_count, - encoding="utf-8", - ) - audit_handler.setFormatter(formatter) - audit_logger.addHandler(audit_handler) - audit_logger.setLevel(logging.INFO) - except Exception as e: - self.logger.warning(f"Failed to setup audit logging: {e}") + # Extract directory from file path if provided + from pathlib import Path + log_dir = str(Path(self.config.logging.file_path).parent) + + # Setup enhanced logging with cleanup functionality + setup_logging( + level=self.config.logging.level, + log_dir=log_dir, + enable_console=True, + enable_file=True, + enable_audit=self.config.logging.enable_audit, + audit_file=self.config.logging.audit_file_path, + max_file_size=self.config.logging.max_file_size, + backup_count=self.config.logging.backup_count, + enable_cleanup=self.config.logging.enable_cleanup, + max_age_days=self.config.logging.max_age_days, + cleanup_interval_hours=self.config.logging.cleanup_interval_hours + ) + + # Update logger to use new system + self.logger = get_logger(__name__) + + self.logger.info("Enhanced logging system with cleanup initialized successfully") + self.logger.info(f"Log directory: {log_dir}") + self.logger.info(f"Log level: {self.config.logging.level}") + self.logger.info(f"Audit logging: {'Enabled' if self.config.logging.enable_audit else 'Disabled'}") + self.logger.info(f"Log cleanup: {'Enabled' if self.config.logging.enable_cleanup else 'Disabled'}") + if self.config.logging.enable_cleanup: + self.logger.info(f"Cleanup config: Max age {self.config.logging.max_age_days} days, interval {self.config.logging.cleanup_interval_hours}h") def validate_config(self) -> bool: """Validate configuration""" diff --git a/doris_mcp_server/utils/db.py b/doris_mcp_server/utils/db.py index 08ed7db..20d3d49 100644 --- a/doris_mcp_server/utils/db.py +++ b/doris_mcp_server/utils/db.py @@ -34,6 +34,8 @@ import random import aiomysql from aiomysql import Connection, Pool +from .logger import get_logger + @@ -71,7 +73,7 @@ class DorisConnection: self.query_count = 0 self.is_healthy = True self.security_manager = security_manager - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) async def execute(self, sql: str, params: tuple | None = None, auth_context=None) -> QueryResult: """Execute SQL query""" @@ -200,7 +202,7 @@ class DorisConnectionManager: self.config = config self.security_manager = security_manager self.pool: Pool | None = None - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) self.metrics = ConnectionMetrics() # Remove session-level connection management @@ -712,7 +714,7 @@ class ConnectionPoolMonitor: def __init__(self, connection_manager: DorisConnectionManager): self.connection_manager = connection_manager - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) async def get_pool_status(self) -> dict[str, Any]: """Get connection pool status""" diff --git a/doris_mcp_server/utils/logger.py b/doris_mcp_server/utils/logger.py index ee80702..23319cb 100644 --- a/doris_mcp_server/utils/logger.py +++ b/doris_mcp_server/utils/logger.py @@ -15,87 +15,615 @@ # specific language governing permissions and limitations # under the License. """ -Logging configuration for Doris MCP Server. +Enhanced Logging configuration for Doris MCP Server. +Features: +- Log level-based file separation +- Timestamped log entries +- Automatic log rotation +- Comprehensive logging coverage """ import logging import logging.config +import logging.handlers import sys +import os +import asyncio +import time from pathlib import Path -from typing import Any +from typing import Any, Optional +from datetime import datetime, timedelta +import threading -def setup_logging( - level: str = "INFO", - log_file: str | None = None, - log_format: str | None = None, -) -> None: - """ - Setup logging configuration. +class TimestampedFormatter(logging.Formatter): + """Custom formatter with enhanced timestamp and structured format""" + + def __init__(self, fmt=None, datefmt=None, style='%'): + if fmt is None: + fmt = "%(asctime)s.%(msecs)03d %(level_aligned)s %(name)s:%(lineno)d - %(message)s" + if datefmt is None: + datefmt = "%Y-%m-%d %H:%M:%S" + super().__init__(fmt, datefmt, style) + + def format(self, record): + """Format log record with enhanced information and proper alignment""" + # Add process info if available + if hasattr(record, 'process') and record.process: + record.process_info = f"[PID:{record.process}]" + else: + record.process_info = "" + + # Add thread info if available + if hasattr(record, 'thread') and record.thread: + record.thread_info = f"[TID:{record.thread}]" + else: + record.thread_info = "" + + # Format with proper alignment after the level name + # Calculate padding needed for alignment + level_name = record.levelname + max_level_length = 8 # Length of "CRITICAL" + padding = max_level_length - len(level_name) + record.level_aligned = f"[{level_name}]{' ' * padding}" + + return super().format(record) - Args: - level: Logging level (DEBUG, INFO, WARNING, ERROR) - log_file: Optional log file path - log_format: Optional custom log format - """ - if log_format is None: - log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - # Base configuration - config: dict[str, Any] = { - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "default": {"format": log_format, "datefmt": "%Y-%m-%d %H:%M:%S"} - }, - "handlers": { - "console": { - "class": "logging.StreamHandler", - "level": level, - "formatter": "default", - "stream": sys.stdout, - } - }, - "root": {"level": level, "handlers": ["console"]}, - "loggers": { - "doris_mcp_server": { - "level": level, - "handlers": ["console"], - "propagate": False, - } - }, - } - - # Add file handler if log_file is specified - if log_file: +class LevelBasedFileHandler(logging.Handler): + """Custom handler that writes different log levels to different files""" + + def __init__(self, log_dir: str, base_name: str = "doris_mcp_server", + max_bytes: int = 10*1024*1024, backup_count: int = 5): + super().__init__() + self.log_dir = Path(log_dir) + self.base_name = base_name + self.max_bytes = max_bytes + self.backup_count = backup_count + # Ensure log directory exists - log_path = Path(log_file) - log_path.parent.mkdir(parents=True, exist_ok=True) - - config["handlers"]["file"] = { - "class": "logging.handlers.RotatingFileHandler", - "level": level, - "formatter": "default", - "filename": log_file, - "maxBytes": 10485760, # 10MB - "backupCount": 5, + self.log_dir.mkdir(parents=True, exist_ok=True) + + # Create handlers for different log levels + self.handlers = {} + self._setup_level_handlers() + + def _setup_level_handlers(self): + """Setup rotating file handlers for different log levels""" + level_files = { + 'DEBUG': 'debug.log', + 'INFO': 'info.log', + 'WARNING': 'warning.log', + 'ERROR': 'error.log', + 'CRITICAL': 'critical.log' } + + formatter = TimestampedFormatter() + + for level, filename in level_files.items(): + file_path = self.log_dir / f"{self.base_name}_{filename}" + handler = logging.handlers.RotatingFileHandler( + file_path, + maxBytes=self.max_bytes, + backupCount=self.backup_count, + encoding='utf-8' + ) + handler.setFormatter(formatter) + handler.setLevel(getattr(logging, level)) + self.handlers[level] = handler + + def emit(self, record): + """Emit log record to appropriate level-based file""" + level_name = record.levelname + if level_name in self.handlers: + try: + self.handlers[level_name].emit(record) + except Exception: + self.handleError(record) + + def close(self): + """Close all handlers""" + for handler in self.handlers.values(): + handler.close() + super().close() - # Add file handler to root and package loggers - config["root"]["handlers"].append("file") - config["loggers"]["doris_mcp_server"]["handlers"].append("file") - logging.config.dictConfig(config) +class LogCleanupManager: + """Log file cleanup manager for automatic maintenance""" + + def __init__(self, log_dir: str, max_age_days: int = 30, cleanup_interval_hours: int = 24): + """ + Initialize log cleanup manager. + + Args: + log_dir: Directory containing log files + max_age_days: Maximum age of log files in days (default: 30 days) + cleanup_interval_hours: Cleanup interval in hours (default: 24 hours) + """ + self.log_dir = Path(log_dir) + self.max_age_days = max_age_days + self.cleanup_interval_hours = cleanup_interval_hours + self.cleanup_thread = None + self.stop_event = threading.Event() + self.logger = None + + def start_cleanup_scheduler(self): + """Start the cleanup scheduler in a background thread""" + if self.cleanup_thread and self.cleanup_thread.is_alive(): + return + + self.stop_event.clear() + self.cleanup_thread = threading.Thread(target=self._cleanup_loop, daemon=True) + self.cleanup_thread.start() + + # Get logger for this class + if not self.logger: + self.logger = logging.getLogger("doris_mcp_server.log_cleanup") + + self.logger.info(f"Log cleanup scheduler started - cleanup every {self.cleanup_interval_hours}h, max age {self.max_age_days} days") + + def stop_cleanup_scheduler(self): + """Stop the cleanup scheduler""" + if self.cleanup_thread and self.cleanup_thread.is_alive(): + self.stop_event.set() + self.cleanup_thread.join(timeout=5) + if self.logger: + self.logger.info("Log cleanup scheduler stopped") + + def _cleanup_loop(self): + """Background loop for periodic cleanup""" + while not self.stop_event.is_set(): + try: + self.cleanup_old_logs() + # Sleep for the specified interval, but check stop event every 60 seconds + for _ in range(self.cleanup_interval_hours * 60): # Convert hours to minutes + if self.stop_event.wait(60): # Wait 60 seconds or until stop event + break + except Exception as e: + if self.logger: + self.logger.error(f"Error in log cleanup loop: {e}") + # Sleep for 5 minutes before retrying + self.stop_event.wait(300) + + def cleanup_old_logs(self): + """Clean up old log files based on age""" + if not self.log_dir.exists(): + return + + current_time = datetime.now() + cutoff_time = current_time - timedelta(days=self.max_age_days) + + cleaned_files = [] + cleaned_size = 0 + + # Pattern for log files (including backup files) + log_patterns = [ + "doris_mcp_server_*.log", + "doris_mcp_server_*.log.*" # Backup files + ] + + for pattern in log_patterns: + for log_file in self.log_dir.glob(pattern): + try: + # Get file modification time + file_mtime = datetime.fromtimestamp(log_file.stat().st_mtime) + + if file_mtime < cutoff_time: + file_size = log_file.stat().st_size + log_file.unlink() # Delete the file + cleaned_files.append(log_file.name) + cleaned_size += file_size + + except Exception as e: + if self.logger: + self.logger.warning(f"Failed to cleanup log file {log_file}: {e}") + + if cleaned_files and self.logger: + size_mb = cleaned_size / (1024 * 1024) + self.logger.info(f"Cleaned up {len(cleaned_files)} old log files, freed {size_mb:.2f} MB") + self.logger.debug(f"Cleaned files: {', '.join(cleaned_files)}") + + def get_cleanup_stats(self) -> dict: + """Get statistics about log files and cleanup status""" + if not self.log_dir.exists(): + return {"error": "Log directory does not exist"} + + stats = { + "log_directory": str(self.log_dir.absolute()), + "max_age_days": self.max_age_days, + "cleanup_interval_hours": self.cleanup_interval_hours, + "scheduler_running": self.cleanup_thread and self.cleanup_thread.is_alive(), + "total_files": 0, + "total_size_mb": 0, + "files_by_age": {"recent": 0, "old": 0}, + "oldest_file": None, + "newest_file": None + } + + current_time = datetime.now() + cutoff_time = current_time - timedelta(days=self.max_age_days) + oldest_time = None + newest_time = None + + log_patterns = ["doris_mcp_server_*.log", "doris_mcp_server_*.log.*"] + + for pattern in log_patterns: + for log_file in self.log_dir.glob(pattern): + try: + file_stat = log_file.stat() + file_mtime = datetime.fromtimestamp(file_stat.st_mtime) + + stats["total_files"] += 1 + stats["total_size_mb"] += file_stat.st_size / (1024 * 1024) + + if file_mtime < cutoff_time: + stats["files_by_age"]["old"] += 1 + else: + stats["files_by_age"]["recent"] += 1 + + if oldest_time is None or file_mtime < oldest_time: + oldest_time = file_mtime + stats["oldest_file"] = {"name": log_file.name, "age_days": (current_time - file_mtime).days} + + if newest_time is None or file_mtime > newest_time: + newest_time = file_mtime + stats["newest_file"] = {"name": log_file.name, "age_days": (current_time - file_mtime).days} + + except Exception: + continue + + stats["total_size_mb"] = round(stats["total_size_mb"], 2) + + return stats + + +class DorisLoggerManager: + """Centralized logger manager for Doris MCP Server""" + + def __init__(self): + self.is_initialized = False + self.log_dir = None + self.config = None + self.loggers = {} + self.cleanup_manager = None + + def setup_logging(self, + level: str = "INFO", + log_dir: str = "logs", + enable_console: bool = True, + enable_file: bool = True, + enable_audit: bool = True, + audit_file: Optional[str] = None, + max_file_size: int = 10*1024*1024, + backup_count: int = 5, + enable_cleanup: bool = True, + max_age_days: int = 30, + cleanup_interval_hours: int = 24) -> None: + """ + Setup comprehensive logging configuration. + + Args: + level: Base logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + log_dir: Directory for log files + enable_console: Enable console output + enable_file: Enable file logging + enable_audit: Enable audit logging + audit_file: Custom audit log file path + max_file_size: Maximum size per log file (bytes) + backup_count: Number of backup files to keep + enable_cleanup: Enable automatic log cleanup + max_age_days: Maximum age of log files in days (default: 30) + cleanup_interval_hours: Cleanup interval in hours (default: 24) + """ + if self.is_initialized: + return + + self.log_dir = Path(log_dir) + self.log_dir.mkdir(parents=True, exist_ok=True) + + # Clear existing handlers + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + + # Set root logger level + root_logger.setLevel(logging.DEBUG) # Allow all levels, handlers will filter + + handlers = [] + + # Console handler + if enable_console: + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(getattr(logging, level.upper())) + console_formatter = TimestampedFormatter( + fmt="%(asctime)s.%(msecs)03d %(level_aligned)s %(name)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + console_handler.setFormatter(console_formatter) + handlers.append(console_handler) + + # Level-based file handlers + if enable_file: + level_handler = LevelBasedFileHandler( + log_dir=str(self.log_dir), + base_name="doris_mcp_server", + max_bytes=max_file_size, + backup_count=backup_count + ) + level_handler.setLevel(logging.DEBUG) # Accept all levels + handlers.append(level_handler) + + # Combined application log (all levels in one file) + if enable_file: + app_log_file = self.log_dir / "doris_mcp_server_all.log" + app_handler = logging.handlers.RotatingFileHandler( + app_log_file, + maxBytes=max_file_size, + backupCount=backup_count, + encoding='utf-8' + ) + app_handler.setLevel(getattr(logging, level.upper())) + app_formatter = TimestampedFormatter() + app_handler.setFormatter(app_formatter) + handlers.append(app_handler) + + # Audit logger (separate from main logging) + if enable_audit: + audit_file_path = audit_file or str(self.log_dir / "doris_mcp_server_audit.log") + audit_logger = logging.getLogger("audit") + audit_logger.setLevel(logging.INFO) + + # Clear existing audit handlers + for handler in audit_logger.handlers[:]: + audit_logger.removeHandler(handler) + + audit_handler = logging.handlers.RotatingFileHandler( + audit_file_path, + maxBytes=max_file_size, + backupCount=backup_count, + encoding='utf-8' + ) + audit_formatter = TimestampedFormatter( + fmt="%(asctime)s.%(msecs)03d [AUDIT] %(name)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + audit_handler.setFormatter(audit_formatter) + audit_logger.addHandler(audit_handler) + audit_logger.propagate = False # Don't propagate to root logger + + # Add all handlers to root logger + for handler in handlers: + root_logger.addHandler(handler) + + # Setup package-specific loggers + self._setup_package_loggers(level) + + # Setup log cleanup manager + if enable_cleanup and enable_file: + self.cleanup_manager = LogCleanupManager( + log_dir=str(self.log_dir), + max_age_days=max_age_days, + cleanup_interval_hours=cleanup_interval_hours + ) + self.cleanup_manager.start_cleanup_scheduler() + + self.is_initialized = True + + # Log initialization message + logger = self.get_logger("doris_mcp_server.logger") + logger.info("=" * 80) + logger.info("Doris MCP Server Logging System Initialized") + logger.info(f"Log Level: {level}") + logger.info(f"Log Directory: {self.log_dir.absolute()}") + logger.info(f"Console Logging: {'Enabled' if enable_console else 'Disabled'}") + logger.info(f"File Logging: {'Enabled' if enable_file else 'Disabled'}") + logger.info(f"Audit Logging: {'Enabled' if enable_audit else 'Disabled'}") + logger.info(f"Log Cleanup: {'Enabled' if enable_cleanup and enable_file else 'Disabled'}") + if enable_cleanup and enable_file: + logger.info(f"Cleanup Settings: Max age {max_age_days} days, interval {cleanup_interval_hours}h") + logger.info("=" * 80) + + def _setup_package_loggers(self, level: str): + """Setup specific loggers for different modules""" + package_loggers = [ + "doris_mcp_server", + "doris_mcp_server.main", + "doris_mcp_server.utils", + "doris_mcp_server.tools", + "doris_mcp_client" + ] + + for logger_name in package_loggers: + logger = logging.getLogger(logger_name) + logger.setLevel(getattr(logging, level.upper())) + # Don't add handlers here - they inherit from root logger + + def get_logger(self, name: str) -> logging.Logger: + """ + Get a logger instance with proper configuration. + + Args: + name: Logger name (usually __name__) + + Returns: + Configured logger instance + """ + if name not in self.loggers: + logger = logging.getLogger(name) + self.loggers[name] = logger + + return self.loggers[name] + + def get_audit_logger(self) -> logging.Logger: + """Get the audit logger""" + return logging.getLogger("audit") + + def log_system_info(self): + """Log system information for debugging""" + logger = self.get_logger("doris_mcp_server.system") + logger.info("System Information:") + logger.info(f"Python Version: {sys.version}") + logger.info(f"Platform: {sys.platform}") + logger.info(f"Working Directory: {os.getcwd()}") + logger.info(f"Process ID: {os.getpid()}") + + # Log environment variables (filtered) + env_vars = ["LOG_LEVEL", "LOG_FILE_PATH", "ENABLE_AUDIT", "AUDIT_FILE_PATH"] + for var in env_vars: + value = os.getenv(var, "Not Set") + logger.info(f"Environment {var}: {value}") + + def get_cleanup_stats(self) -> dict: + """Get log cleanup statistics""" + if self.cleanup_manager: + return self.cleanup_manager.get_cleanup_stats() + else: + return {"error": "Log cleanup is not enabled"} + + def manual_cleanup(self) -> dict: + """Manually trigger log cleanup and return statistics""" + if self.cleanup_manager: + self.cleanup_manager.cleanup_old_logs() + return self.cleanup_manager.get_cleanup_stats() + else: + return {"error": "Log cleanup is not enabled"} + + def shutdown(self): + """Shutdown logging system""" + if not self.is_initialized: + return + + logger = self.get_logger("doris_mcp_server.logger") + logger.info("Shutting down logging system...") + + # Stop cleanup manager + if self.cleanup_manager: + self.cleanup_manager.stop_cleanup_scheduler() + + # Close all handlers + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + try: + handler.close() + except Exception as e: + print(f"Error closing handler: {e}") + + # Close audit logger handlers + audit_logger = logging.getLogger("audit") + for handler in audit_logger.handlers[:]: + try: + handler.close() + except Exception as e: + print(f"Error closing audit handler: {e}") + + self.is_initialized = False + + +# Global logger manager instance +_logger_manager = DorisLoggerManager() + + +def setup_logging(level: str = "INFO", + log_dir: str = "logs", + enable_console: bool = True, + enable_file: bool = True, + enable_audit: bool = True, + audit_file: Optional[str] = None, + max_file_size: int = 10*1024*1024, + backup_count: int = 5, + enable_cleanup: bool = True, + max_age_days: int = 30, + cleanup_interval_hours: int = 24) -> None: + """ + Setup logging configuration (convenience function). + + Args: + level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + log_dir: Directory for log files + enable_console: Enable console output + enable_file: Enable file logging + enable_audit: Enable audit logging + audit_file: Custom audit log file path + max_file_size: Maximum size per log file (bytes) + backup_count: Number of backup files to keep + enable_cleanup: Enable automatic log cleanup + max_age_days: Maximum age of log files in days (default: 30) + cleanup_interval_hours: Cleanup interval in hours (default: 24) + """ + _logger_manager.setup_logging( + level=level, + log_dir=log_dir, + enable_console=enable_console, + enable_file=enable_file, + enable_audit=enable_audit, + audit_file=audit_file, + max_file_size=max_file_size, + backup_count=backup_count, + enable_cleanup=enable_cleanup, + max_age_days=max_age_days, + cleanup_interval_hours=cleanup_interval_hours + ) def get_logger(name: str) -> logging.Logger: """ Get a logger instance. - + Args: - name: Logger name - + name: Logger name (usually __name__) + Returns: - Logger instance + Configured logger instance """ - return logging.getLogger(name) + return _logger_manager.get_logger(name) + + +def get_audit_logger() -> logging.Logger: + """Get the audit logger""" + return _logger_manager.get_audit_logger() + + +def log_system_info(): + """Log system information for debugging""" + _logger_manager.log_system_info() + + +def get_cleanup_stats() -> dict: + """Get log cleanup statistics""" + return _logger_manager.get_cleanup_stats() + + +def manual_cleanup() -> dict: + """Manually trigger log cleanup and return statistics""" + return _logger_manager.manual_cleanup() + + +def shutdown_logging(): + """Shutdown logging system""" + _logger_manager.shutdown() + + +# Compatibility function for existing code +def setup_logging_old(level: str = "INFO", + log_file: str | None = None, + log_format: str | None = None) -> None: + """ + Legacy setup function for backward compatibility. + + Args: + level: Logging level (DEBUG, INFO, WARNING, ERROR) + log_file: Optional log file path (deprecated - use log_dir instead) + log_format: Optional custom log format (deprecated) + """ + # Extract directory from log_file if provided + log_dir = "logs" + if log_file: + log_dir = str(Path(log_file).parent) + + setup_logging( + level=level, + log_dir=log_dir, + enable_console=True, + enable_file=True, + enable_audit=True + ) diff --git a/doris_mcp_server/utils/query_executor.py b/doris_mcp_server/utils/query_executor.py index 367dd9c..46f8794 100644 --- a/doris_mcp_server/utils/query_executor.py +++ b/doris_mcp_server/utils/query_executor.py @@ -34,6 +34,7 @@ from typing import Any, Dict from decimal import Decimal from .db import DorisConnectionManager, QueryResult +from .logger import get_logger @dataclass @@ -92,7 +93,7 @@ class QueryCache: self.max_size = max_size self.default_ttl = default_ttl self.cache: dict[str, CachedQuery] = {} - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) def _generate_cache_key( self, sql: str, parameters: dict[str, Any] | None = None @@ -194,7 +195,7 @@ class QueryOptimizer: def __init__(self, config): self.config = config - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) self.optimization_rules = self._load_optimization_rules() def _load_optimization_rules(self) -> list[dict[str, Any]]: @@ -318,7 +319,7 @@ class DorisQueryExecutor: def __init__(self, connection_manager: DorisConnectionManager, config=None): self.connection_manager = connection_manager self.config = config or self._create_default_config() - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) # Initialize components cache_config = getattr(self.config, 'performance', None) @@ -746,7 +747,7 @@ class QueryPerformanceMonitor: def __init__(self, query_executor: DorisQueryExecutor): self.query_executor = query_executor - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) self.performance_records = [] async def record_query_performance( diff --git a/doris_mcp_server/utils/schema_extractor.py b/doris_mcp_server/utils/schema_extractor.py index db973e7..9496142 100644 --- a/doris_mcp_server/utils/schema_extractor.py +++ b/doris_mcp_server/utils/schema_extractor.py @@ -31,7 +31,7 @@ from dotenv import load_dotenv from datetime import datetime, timedelta # Import unified logging configuration -from doris_mcp_server.utils.logger import get_logger +from .logger import get_logger # Configure logging logger = get_logger(__name__) diff --git a/doris_mcp_server/utils/security.py b/doris_mcp_server/utils/security.py index 24952b3..abf6a7f 100644 --- a/doris_mcp_server/utils/security.py +++ b/doris_mcp_server/utils/security.py @@ -31,6 +31,8 @@ import sqlparse from sqlparse.sql import Statement from sqlparse.tokens import Keyword, Name +from .logger import get_logger + class SecurityLevel(Enum): """Security level enumeration""" @@ -86,7 +88,7 @@ class DorisSecurityManager: def __init__(self, config): self.config = config - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) # Initialize security components self.auth_provider = AuthenticationProvider(config) @@ -211,7 +213,7 @@ class AuthenticationProvider: def __init__(self, config): self.config = config - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) self.session_cache = {} async def authenticate(self, auth_info: dict[str, Any]) -> AuthContext: @@ -321,7 +323,7 @@ class AuthorizationProvider: def __init__(self, config): self.config = config - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) self.permission_cache = {} # Load sensitive tables configuration @@ -464,7 +466,7 @@ class SQLSecurityValidator: def __init__(self, config): self.config = config - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) # Handle DorisConfig object or dictionary configuration if hasattr(config, 'get'): @@ -686,7 +688,7 @@ class DataMaskingProcessor: def __init__(self, config): self.config = config - self.logger = logging.getLogger(__name__) + self.logger = get_logger(__name__) self.masking_algorithms = self._init_masking_algorithms() self.masking_rules = self._load_masking_rules()