init doris mcp 0.2.0

2025-05-06 12:56:55 +08:00
parent 9dc25be87a
commit c190f19cb5
23 changed files with 6405 additions and 0 deletions
--- a/doris_mcp_server/tools/init.py
+++ b/doris_mcp_server/tools/init.py
@@ -0,0 +1,23 @@
+from .mcp_doris_tools import (
+    mcp_doris_exec_query,
+    mcp_doris_get_table_schema,
+    mcp_doris_get_db_table_list,
+    mcp_doris_get_db_list,
+    mcp_doris_get_table_comment,
+    mcp_doris_get_table_column_comments,
+    mcp_doris_get_table_indexes,
+    mcp_doris_get_recent_audit_logs
+)
+
+# The __all__ list should reflect the registered tool names,
+# even though the implementation functions have the prefix.
+__all__ = [
+    "exec_query",
+    "get_table_schema",
+    "get_db_table_list",
+    "get_db_list",
+    "get_table_comment",
+    "get_table_column_comments",
+    "get_table_indexes",
+    "get_recent_audit_logs"
+] 
--- a/doris_mcp_server/tools/mcp_doris_tools.py
+++ b/doris_mcp_server/tools/mcp_doris_tools.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Doris MCP Tool Implementations
+
+Includes exec_query and new tools based on schema_extractor.
+"""
+
+import os
+import time
+import json
+import logging
+from typing import Dict, Any
+import pandas as pd
+
+# --- Use absolute imports ---
+from doris_mcp_server.utils.schema_extractor import MetadataExtractor
+from doris_mcp_server.utils.sql_executor_tools import execute_sql_query
+
+# Get logger
+logger = logging.getLogger("doris-mcp-tools")
+
+# --- Helper Function to format response ---
+def _format_response(success: bool, result: Any = None, error: str = None, message: str = "") -> Dict[str, Any]:
+    response_data = {
+        "success": success,
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
+    }
+    if success and result is not None:
+        # Handle DataFrame serialization
+        if isinstance(result, pd.DataFrame):
+            try:
+                # Convert DataFrame to JSON records format
+                response_data["result"] = json.loads(result.to_json(orient='records', date_format='iso'))
+            except Exception as df_err:
+                logger.error(f"DataFrame to JSON conversion failed: {df_err}")
+                # Fallback or specific error handling for DataFrame
+                response_data["result"] = {"error": "Failed to serialize DataFrame result"}
+                response_data["success"] = False # Mark as failed if serialization fails
+                response_data["error"] = f"DataFrame serialization error: {str(df_err)}"
+        else:
+            response_data["result"] = result
+        response_data["message"] = message or "Operation successful" # Translated: Operation successful
+    elif not success:
+        response_data["error"] = error or "Unknown error" # Translated: Unknown error
+        response_data["message"] = message or "Operation failed" # Translated: Operation failed
+
+    return {
+        "content": [
+            {
+                "type": "text",
+                "text": json.dumps(response_data, ensure_ascii=False, default=str) # Use default=str for non-serializable types
+            }
+        ]
+    }
+
+async def mcp_doris_exec_query(sql: str = None, db_name: str = None, max_rows: int = 100, timeout: int = 30) -> Dict[str, Any]:
+    """
+    Executes an SQL query and returns the result.
+
+    Args:
+        sql (str): The SQL query to execute.
+        db_name (str, optional): Target database name. Defaults to the configured default database.
+        max_rows (int, optional): Maximum number of rows to return. Defaults to 100.
+        timeout (int, optional): Query timeout in seconds. Defaults to 30.
+
+    Returns:
+        Dict[str, Any]: A dictionary containing the query result or an error.
+    """
+    logger.info(f"MCP Tool Call: mcp_doris_exec_query, SQL: {sql}, DB: {db_name}, MaxRows: {max_rows}, Timeout: {timeout}")
+    try:
+        if not sql:
+            return _format_response(success=False, error="SQL statement not provided", message="Please provide the SQL statement to execute")
+
+        # Build parameters to pass to execute_sql_query
+        exec_ctx = {
+            "params": {
+                "sql": sql,
+                "db_name": db_name,
+                "max_rows": max_rows,
+                "timeout": timeout
+            }
+        }
+
+        # Directly call execute_sql_query to execute the query
+        exec_result = await execute_sql_query(exec_ctx)
+
+        # The format returned by execute_sql_query is {'content': [{'type': 'text', 'text': json_string}]}
+        # Need to parse the internal JSON string
+        if exec_result and 'content' in exec_result and len(exec_result['content']) > 0 and 'text' in exec_result['content'][0]:
+            try:
+                # Parse JSON string
+                result_data = json.loads(exec_result['content'][0]['text'])
+
+                # Directly return the parsed result obtained from execute_sql_query
+                # This result is already in the format {"success": ..., "data": ..., "columns": ...} or {"success": false, "error": ...}
+                # _format_response would wrap it again, but here we directly use the parsed data
+                # Note: This changes the original return structure of this function; it now directly returns the output of sql_executor
+                # If the _format_response wrapper needs to be maintained, the code below needs adjustment
+                return {
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": json.dumps(result_data, ensure_ascii=False, default=str)
+                        }
+                    ]
+                }
+            except json.JSONDecodeError as json_err:
+                logger.error(f"Failed to parse execute_sql_query result: {json_err}")
+                return _format_response(success=False, error=str(json_err), message="Error parsing SQL execution result")
+            except Exception as parse_err:
+                logger.error(f"Unexpected error occurred while processing execute_sql_query result: {parse_err}", exc_info=True)
+                return _format_response(success=False, error=str(parse_err), message="Unknown error occurred while processing SQL execution result")
+        else:
+            logger.error(f"execute_sql_query returned an unexpected format: {exec_result}")
+            return _format_response(success=False, error="SQL executor returned invalid format", message="Internal error executing SQL query")
+
+    except Exception as e:
+        logger.error(f"MCP tool execution failed mcp_doris_exec_query: {str(e)}", exc_info=True)
+        return _format_response(success=False, error=str(e), message="Error executing SQL query")
+
+
+async def mcp_doris_get_table_schema(table_name: str, db_name: str = None) -> Dict[str, Any]:
+    logger.info(f"MCP Tool Call: mcp_doris_get_table_schema, Table: {table_name}, DB: {db_name}")
+    if not table_name:
+         return _format_response(success=False, error="Missing table_name parameter")
+    try:
+        extractor = MetadataExtractor(db_name=db_name)
+        schema = extractor.get_table_schema(table_name=table_name, db_name=db_name)
+        if not schema:
+             return _format_response(success=False, error="Table not found or has no columns", message=f"Could not get schema for table {db_name or extractor.db_name}.{table_name}")
+        return _format_response(success=True, result=schema)
+    except Exception as e:
+        logger.error(f"MCP tool execution failed mcp_doris_get_table_schema: {str(e)}", exc_info=True)
+        return _format_response(success=False, error=str(e), message="Error getting table schema")
+
+async def mcp_doris_get_db_table_list(db_name: str = None) -> Dict[str, Any]:
+    logger.info(f"MCP Tool Call: mcp_doris_get_db_table_list, DB: {db_name}")
+    try:
+        extractor = MetadataExtractor(db_name=db_name)
+        tables = extractor.get_database_tables(db_name=db_name)
+        return _format_response(success=True, result=tables)
+    except Exception as e:
+        logger.error(f"MCP tool execution failed mcp_doris_get_db_table_list: {str(e)}", exc_info=True)
+        return _format_response(success=False, error=str(e), message="Error getting database table list")
+
+async def mcp_doris_get_db_list() -> Dict[str, Any]:
+    logger.info(f"MCP Tool Call: mcp_doris_get_db_list")
+    try:
+        extractor = MetadataExtractor()
+        databases = extractor.get_all_databases()
+        return _format_response(success=True, result=databases)
+    except Exception as e:
+        logger.error(f"MCP tool execution failed mcp_doris_get_db_list: {str(e)}", exc_info=True)
+        return _format_response(success=False, error=str(e), message="Error getting database list")
+
+async def mcp_doris_get_table_comment(table_name: str, db_name: str = None) -> Dict[str, Any]:
+    logger.info(f"MCP Tool Call: mcp_doris_get_table_comment, Table: {table_name}, DB: {db_name}")
+    if not table_name:
+         return _format_response(success=False, error="Missing table_name parameter")
+    try:
+        extractor = MetadataExtractor(db_name=db_name)
+        comment = extractor.get_table_comment(table_name=table_name, db_name=db_name)
+        return _format_response(success=True, result=comment)
+    except Exception as e:
+        logger.error(f"MCP tool execution failed mcp_doris_get_table_comment: {str(e)}", exc_info=True)
+        return _format_response(success=False, error=str(e), message="Error getting table comment")
+
+async def mcp_doris_get_table_column_comments(table_name: str, db_name: str = None) -> Dict[str, Any]:
+    logger.info(f"MCP Tool Call: mcp_doris_get_table_column_comments, Table: {table_name}, DB: {db_name}")
+    if not table_name:
+         return _format_response(success=False, error="Missing table_name parameter")
+    try:
+        extractor = MetadataExtractor(db_name=db_name)
+        comments = extractor.get_column_comments(table_name=table_name, db_name=db_name)
+        return _format_response(success=True, result=comments)
+    except Exception as e:
+        logger.error(f"MCP tool execution failed mcp_doris_get_table_column_comments: {str(e)}", exc_info=True)
+        return _format_response(success=False, error=str(e), message="Error getting column comments")
+
+async def mcp_doris_get_table_indexes(table_name: str, db_name: str = None) -> Dict[str, Any]:
+    logger.info(f"MCP Tool Call: mcp_doris_get_table_indexes, Table: {table_name}, DB: {db_name}")
+    if not table_name:
+         return _format_response(success=False, error="Missing table_name parameter")
+    try:
+        extractor = MetadataExtractor(db_name=db_name)
+        indexes = extractor.get_table_indexes(table_name=table_name, db_name=db_name)
+        return _format_response(success=True, result=indexes)
+    except Exception as e:
+        logger.error(f"MCP tool execution failed mcp_doris_get_table_indexes: {str(e)}", exc_info=True)
+        return _format_response(success=False, error=str(e), message="Error getting table indexes")
+
+async def mcp_doris_get_recent_audit_logs(days: int = 7, limit: int = 100) -> Dict[str, Any]:
+    logger.info(f"MCP Tool Call: mcp_doris_get_recent_audit_logs, Days: {days}, Limit: {limit}")
+    try:
+        extractor = MetadataExtractor()
+        logs_df = extractor.get_recent_audit_logs(days=days, limit=limit)
+        return _format_response(success=True, result=logs_df)
+    except Exception as e:
+        logger.error(f"MCP tool execution failed mcp_doris_get_recent_audit_logs: {str(e)}", exc_info=True)
+        return _format_response(success=False, error=str(e), message="Error getting audit logs")
--- a/doris_mcp_server/tools/tool_initializer.py
+++ b/doris_mcp_server/tools/tool_initializer.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Tool Initialization Module
+
+Centralized initialization of all tools, ensuring they are correctly registered with MCP
+"""
+
+import logging
+import os
+from typing import List, Dict, Any, Optional
+import json
+from datetime import datetime
+import traceback
+
+# Import Context
+from mcp.server.fastmcp import Context
+
+# Import doris mcp tools
+from doris_mcp_server.tools.mcp_doris_tools import (
+    mcp_doris_exec_query,
+    mcp_doris_get_table_schema,
+    mcp_doris_get_db_table_list,
+    mcp_doris_get_db_list,
+    mcp_doris_get_table_comment,
+    mcp_doris_get_table_column_comments,
+    mcp_doris_get_table_indexes,
+    mcp_doris_get_recent_audit_logs
+)
+
+# Get logger
+logger = logging.getLogger("doris-mcp-tools-initializer")
+
+async def register_mcp_tools(mcp):
+    """Register MCP tool functions
+    
+    Args:
+        mcp: FastMCP instance
+    """
+    logger.info("Starting to register MCP tools...")
+    
+    try:
+        # Register Tool: Execute SQL Query (Using long description string including parameters)
+        @mcp.tool("exec_query", description="""[Function Description]: Execute SQL query and return result command (executed by the client).\n
+[Parameter Content]:\n
+- random_string (string) [Required] - Unique identifier for the tool call\n
+- sql (string) [Required] - SQL statement to execute\n
+- db_name (string) [Optional] - Target database name, defaults to the current database\n
+- max_rows (integer) [Optional] - Maximum number of rows to return, default 100
+- timeout (integer) [Optional] - Query timeout in seconds, default 30""")
+        async def exec_query_tool(sql: str, db_name: str = None, max_rows: int = 100, timeout: int = 30) -> Dict[str, Any]:
+            """Wrapper: Execute SQL query and return result command"""
+            # Note: ctx parameter is no longer needed here as we receive named parameters directly
+            return await mcp_doris_exec_query(sql=sql, db_name=db_name, max_rows=max_rows, timeout=timeout)
+        
+        # Register Tool: Get Table Schema (Keep long description string including parameters)
+        @mcp.tool("get_table_schema", description="""[Function Description]: Get detailed structure information of the specified table (columns, types, comments, etc.).\n
+[Parameter Content]:\n
+- random_string (string) [Required] - Unique identifier for the tool call\n
+- table_name (string) [Required] - Name of the table to query\n
+- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
+        async def get_table_schema_tool(table_name: str, db_name: str = None) -> Dict[str, Any]:
+            """Wrapper: Get table schema"""
+            if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
+            return await mcp_doris_get_table_schema(table_name=table_name, db_name=db_name)
+        
+        # Register Tool: Get Database Table List (Keep long description string including parameters)
+        @mcp.tool("get_db_table_list", description="""[Function Description]: Get a list of all table names in the specified database.\n
+[Parameter Content]:\n
+- random_string (string) [Required] - Unique identifier for the tool call\n
+- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
+        async def get_db_table_list_tool(db_name: str = None) -> Dict[str, Any]:
+            """Wrapper: Get database table list"""
+            return await mcp_doris_get_db_table_list(db_name=db_name)
+        
+        # Register Tool: Get Database List (Keep long description string including parameters)
+        # Note: Although the description mentions random_string, the wrapper function signature does not. See how mcp handles this.
+        @mcp.tool("get_db_list", description="""[Function Description]: Get a list of all database names on the server.\n
+[Parameter Content]:\n
+- random_string (string) [Required] - Unique identifier for the tool call\n""")
+        async def get_db_list_tool() -> Dict[str, Any]: # Function signature has no parameters
+            """Wrapper: Get database list"""
+            return await mcp_doris_get_db_list()
+        
+        # Register Tool: Get Table Comment (Keep long description string including parameters)
+        @mcp.tool("get_table_comment", description="""[Function Description]: Get the comment information for the specified table.\n
+[Parameter Content]:\n
+- random_string (string) [Required] - Unique identifier for the tool call\n
+- table_name (string) [Required] - Name of the table to query\n
+- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
+        async def get_table_comment_tool(table_name: str, db_name: str = None) -> Dict[str, Any]:
+            """Wrapper: Get table comment"""
+            if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
+            return await mcp_doris_get_table_comment(table_name=table_name, db_name=db_name)
+        
+        # Register Tool: Get Table Column Comments (Keep long description string including parameters)
+        @mcp.tool("get_table_column_comments", description="""[Function Description]: Get comment information for all columns in the specified table.\n
+[Parameter Content]:\n
+- random_string (string) [Required] - Unique identifier for the tool call\n
+- table_name (string) [Required] - Name of the table to query\n
+- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
+        async def get_table_column_comments_tool(table_name: str, db_name: str = None) -> Dict[str, Any]:
+            """Wrapper: Get table column comments"""
+            if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
+            return await mcp_doris_get_table_column_comments(table_name=table_name, db_name=db_name)
+        
+        # Register Tool: Get Table Indexes (Keep long description string including parameters)
+        @mcp.tool("get_table_indexes", description="""[Function Description]: Get index information for the specified table.\n
+[Parameter Content]:\n
+- random_string (string) [Required] - Unique identifier for the tool call\n
+- table_name (string) [Required] - Name of the table to query\n
+- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
+        async def get_table_indexes_tool(table_name: str, db_name: str = None) -> Dict[str, Any]:
+            """Wrapper: Get table indexes"""
+            if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
+            return await mcp_doris_get_table_indexes(table_name=table_name, db_name=db_name)
+        
+        # Register Tool: Get Recent Audit Logs (Keep long description string including parameters)
+        @mcp.tool("get_recent_audit_logs", description="""[Function Description]: Get audit log records for a recent period.\n
+[Parameter Content]:\n
+- random_string (string) [Required] - Unique identifier for the tool call\n
+- days (integer) [Optional] - Number of recent days of logs to retrieve, default is 7\n
+- limit (integer) [Optional] - Maximum number of records to return, default is 100\n""")
+        async def get_recent_audit_logs_tool(days: int = 7, limit: int = 100) -> Dict[str, Any]:
+            """Wrapper: Get recent audit logs"""
+            try:
+                days = int(days)
+                limit = int(limit)
+            except (ValueError, TypeError):
+                 return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "days and limit parameters must be integers"})}]}
+            return await mcp_doris_get_recent_audit_logs(days=days, limit=limit)
+        
+        # Get tool count
+        tools_count = len(await mcp.list_tools())
+        logger.info(f"Registered all MCP tools, total {tools_count} tools")
+        return True
+    except Exception as e:
+        logger.error(f"Error registering MCP tools: {str(e)}")
+        logger.error(traceback.format_exc())
+        return False