init doris mcp 0.2.0

This commit is contained in:
Yijia Su
2025-05-06 12:56:55 +08:00
parent 9dc25be87a
commit c190f19cb5
23 changed files with 6405 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
from .mcp_doris_tools import (
mcp_doris_exec_query,
mcp_doris_get_table_schema,
mcp_doris_get_db_table_list,
mcp_doris_get_db_list,
mcp_doris_get_table_comment,
mcp_doris_get_table_column_comments,
mcp_doris_get_table_indexes,
mcp_doris_get_recent_audit_logs
)
# The __all__ list should reflect the registered tool names,
# even though the implementation functions have the prefix.
__all__ = [
"exec_query",
"get_table_schema",
"get_db_table_list",
"get_db_list",
"get_table_comment",
"get_table_column_comments",
"get_table_indexes",
"get_recent_audit_logs"
]

View File

@@ -0,0 +1,202 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Doris MCP Tool Implementations
Includes exec_query and new tools based on schema_extractor.
"""
import os
import time
import json
import logging
from typing import Dict, Any
import pandas as pd
# --- Use absolute imports ---
from doris_mcp_server.utils.schema_extractor import MetadataExtractor
from doris_mcp_server.utils.sql_executor_tools import execute_sql_query
# Get logger
logger = logging.getLogger("doris-mcp-tools")
# --- Helper Function to format response ---
def _format_response(success: bool, result: Any = None, error: str = None, message: str = "") -> Dict[str, Any]:
response_data = {
"success": success,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
}
if success and result is not None:
# Handle DataFrame serialization
if isinstance(result, pd.DataFrame):
try:
# Convert DataFrame to JSON records format
response_data["result"] = json.loads(result.to_json(orient='records', date_format='iso'))
except Exception as df_err:
logger.error(f"DataFrame to JSON conversion failed: {df_err}")
# Fallback or specific error handling for DataFrame
response_data["result"] = {"error": "Failed to serialize DataFrame result"}
response_data["success"] = False # Mark as failed if serialization fails
response_data["error"] = f"DataFrame serialization error: {str(df_err)}"
else:
response_data["result"] = result
response_data["message"] = message or "Operation successful" # Translated: Operation successful
elif not success:
response_data["error"] = error or "Unknown error" # Translated: Unknown error
response_data["message"] = message or "Operation failed" # Translated: Operation failed
return {
"content": [
{
"type": "text",
"text": json.dumps(response_data, ensure_ascii=False, default=str) # Use default=str for non-serializable types
}
]
}
async def mcp_doris_exec_query(sql: str = None, db_name: str = None, max_rows: int = 100, timeout: int = 30) -> Dict[str, Any]:
"""
Executes an SQL query and returns the result.
Args:
sql (str): The SQL query to execute.
db_name (str, optional): Target database name. Defaults to the configured default database.
max_rows (int, optional): Maximum number of rows to return. Defaults to 100.
timeout (int, optional): Query timeout in seconds. Defaults to 30.
Returns:
Dict[str, Any]: A dictionary containing the query result or an error.
"""
logger.info(f"MCP Tool Call: mcp_doris_exec_query, SQL: {sql}, DB: {db_name}, MaxRows: {max_rows}, Timeout: {timeout}")
try:
if not sql:
return _format_response(success=False, error="SQL statement not provided", message="Please provide the SQL statement to execute")
# Build parameters to pass to execute_sql_query
exec_ctx = {
"params": {
"sql": sql,
"db_name": db_name,
"max_rows": max_rows,
"timeout": timeout
}
}
# Directly call execute_sql_query to execute the query
exec_result = await execute_sql_query(exec_ctx)
# The format returned by execute_sql_query is {'content': [{'type': 'text', 'text': json_string}]}
# Need to parse the internal JSON string
if exec_result and 'content' in exec_result and len(exec_result['content']) > 0 and 'text' in exec_result['content'][0]:
try:
# Parse JSON string
result_data = json.loads(exec_result['content'][0]['text'])
# Directly return the parsed result obtained from execute_sql_query
# This result is already in the format {"success": ..., "data": ..., "columns": ...} or {"success": false, "error": ...}
# _format_response would wrap it again, but here we directly use the parsed data
# Note: This changes the original return structure of this function; it now directly returns the output of sql_executor
# If the _format_response wrapper needs to be maintained, the code below needs adjustment
return {
"content": [
{
"type": "text",
"text": json.dumps(result_data, ensure_ascii=False, default=str)
}
]
}
except json.JSONDecodeError as json_err:
logger.error(f"Failed to parse execute_sql_query result: {json_err}")
return _format_response(success=False, error=str(json_err), message="Error parsing SQL execution result")
except Exception as parse_err:
logger.error(f"Unexpected error occurred while processing execute_sql_query result: {parse_err}", exc_info=True)
return _format_response(success=False, error=str(parse_err), message="Unknown error occurred while processing SQL execution result")
else:
logger.error(f"execute_sql_query returned an unexpected format: {exec_result}")
return _format_response(success=False, error="SQL executor returned invalid format", message="Internal error executing SQL query")
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_exec_query: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error executing SQL query")
async def mcp_doris_get_table_schema(table_name: str, db_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_table_schema, Table: {table_name}, DB: {db_name}")
if not table_name:
return _format_response(success=False, error="Missing table_name parameter")
try:
extractor = MetadataExtractor(db_name=db_name)
schema = extractor.get_table_schema(table_name=table_name, db_name=db_name)
if not schema:
return _format_response(success=False, error="Table not found or has no columns", message=f"Could not get schema for table {db_name or extractor.db_name}.{table_name}")
return _format_response(success=True, result=schema)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_table_schema: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting table schema")
async def mcp_doris_get_db_table_list(db_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_db_table_list, DB: {db_name}")
try:
extractor = MetadataExtractor(db_name=db_name)
tables = extractor.get_database_tables(db_name=db_name)
return _format_response(success=True, result=tables)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_db_table_list: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting database table list")
async def mcp_doris_get_db_list() -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_db_list")
try:
extractor = MetadataExtractor()
databases = extractor.get_all_databases()
return _format_response(success=True, result=databases)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_db_list: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting database list")
async def mcp_doris_get_table_comment(table_name: str, db_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_table_comment, Table: {table_name}, DB: {db_name}")
if not table_name:
return _format_response(success=False, error="Missing table_name parameter")
try:
extractor = MetadataExtractor(db_name=db_name)
comment = extractor.get_table_comment(table_name=table_name, db_name=db_name)
return _format_response(success=True, result=comment)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_table_comment: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting table comment")
async def mcp_doris_get_table_column_comments(table_name: str, db_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_table_column_comments, Table: {table_name}, DB: {db_name}")
if not table_name:
return _format_response(success=False, error="Missing table_name parameter")
try:
extractor = MetadataExtractor(db_name=db_name)
comments = extractor.get_column_comments(table_name=table_name, db_name=db_name)
return _format_response(success=True, result=comments)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_table_column_comments: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting column comments")
async def mcp_doris_get_table_indexes(table_name: str, db_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_table_indexes, Table: {table_name}, DB: {db_name}")
if not table_name:
return _format_response(success=False, error="Missing table_name parameter")
try:
extractor = MetadataExtractor(db_name=db_name)
indexes = extractor.get_table_indexes(table_name=table_name, db_name=db_name)
return _format_response(success=True, result=indexes)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_table_indexes: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting table indexes")
async def mcp_doris_get_recent_audit_logs(days: int = 7, limit: int = 100) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_recent_audit_logs, Days: {days}, Limit: {limit}")
try:
extractor = MetadataExtractor()
logs_df = extractor.get_recent_audit_logs(days=days, limit=limit)
return _format_response(success=True, result=logs_df)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_recent_audit_logs: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting audit logs")

View File

@@ -0,0 +1,141 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Tool Initialization Module
Centralized initialization of all tools, ensuring they are correctly registered with MCP
"""
import logging
import os
from typing import List, Dict, Any, Optional
import json
from datetime import datetime
import traceback
# Import Context
from mcp.server.fastmcp import Context
# Import doris mcp tools
from doris_mcp_server.tools.mcp_doris_tools import (
mcp_doris_exec_query,
mcp_doris_get_table_schema,
mcp_doris_get_db_table_list,
mcp_doris_get_db_list,
mcp_doris_get_table_comment,
mcp_doris_get_table_column_comments,
mcp_doris_get_table_indexes,
mcp_doris_get_recent_audit_logs
)
# Get logger
logger = logging.getLogger("doris-mcp-tools-initializer")
async def register_mcp_tools(mcp):
"""Register MCP tool functions
Args:
mcp: FastMCP instance
"""
logger.info("Starting to register MCP tools...")
try:
# Register Tool: Execute SQL Query (Using long description string including parameters)
@mcp.tool("exec_query", description="""[Function Description]: Execute SQL query and return result command (executed by the client).\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- sql (string) [Required] - SQL statement to execute\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n
- max_rows (integer) [Optional] - Maximum number of rows to return, default 100
- timeout (integer) [Optional] - Query timeout in seconds, default 30""")
async def exec_query_tool(sql: str, db_name: str = None, max_rows: int = 100, timeout: int = 30) -> Dict[str, Any]:
"""Wrapper: Execute SQL query and return result command"""
# Note: ctx parameter is no longer needed here as we receive named parameters directly
return await mcp_doris_exec_query(sql=sql, db_name=db_name, max_rows=max_rows, timeout=timeout)
# Register Tool: Get Table Schema (Keep long description string including parameters)
@mcp.tool("get_table_schema", description="""[Function Description]: Get detailed structure information of the specified table (columns, types, comments, etc.).\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- table_name (string) [Required] - Name of the table to query\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
async def get_table_schema_tool(table_name: str, db_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get table schema"""
if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
return await mcp_doris_get_table_schema(table_name=table_name, db_name=db_name)
# Register Tool: Get Database Table List (Keep long description string including parameters)
@mcp.tool("get_db_table_list", description="""[Function Description]: Get a list of all table names in the specified database.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
async def get_db_table_list_tool(db_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get database table list"""
return await mcp_doris_get_db_table_list(db_name=db_name)
# Register Tool: Get Database List (Keep long description string including parameters)
# Note: Although the description mentions random_string, the wrapper function signature does not. See how mcp handles this.
@mcp.tool("get_db_list", description="""[Function Description]: Get a list of all database names on the server.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n""")
async def get_db_list_tool() -> Dict[str, Any]: # Function signature has no parameters
"""Wrapper: Get database list"""
return await mcp_doris_get_db_list()
# Register Tool: Get Table Comment (Keep long description string including parameters)
@mcp.tool("get_table_comment", description="""[Function Description]: Get the comment information for the specified table.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- table_name (string) [Required] - Name of the table to query\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
async def get_table_comment_tool(table_name: str, db_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get table comment"""
if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
return await mcp_doris_get_table_comment(table_name=table_name, db_name=db_name)
# Register Tool: Get Table Column Comments (Keep long description string including parameters)
@mcp.tool("get_table_column_comments", description="""[Function Description]: Get comment information for all columns in the specified table.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- table_name (string) [Required] - Name of the table to query\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
async def get_table_column_comments_tool(table_name: str, db_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get table column comments"""
if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
return await mcp_doris_get_table_column_comments(table_name=table_name, db_name=db_name)
# Register Tool: Get Table Indexes (Keep long description string including parameters)
@mcp.tool("get_table_indexes", description="""[Function Description]: Get index information for the specified table.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- table_name (string) [Required] - Name of the table to query\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n""")
async def get_table_indexes_tool(table_name: str, db_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get table indexes"""
if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
return await mcp_doris_get_table_indexes(table_name=table_name, db_name=db_name)
# Register Tool: Get Recent Audit Logs (Keep long description string including parameters)
@mcp.tool("get_recent_audit_logs", description="""[Function Description]: Get audit log records for a recent period.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- days (integer) [Optional] - Number of recent days of logs to retrieve, default is 7\n
- limit (integer) [Optional] - Maximum number of records to return, default is 100\n""")
async def get_recent_audit_logs_tool(days: int = 7, limit: int = 100) -> Dict[str, Any]:
"""Wrapper: Get recent audit logs"""
try:
days = int(days)
limit = int(limit)
except (ValueError, TypeError):
return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "days and limit parameters must be integers"})}]}
return await mcp_doris_get_recent_audit_logs(days=days, limit=limit)
# Get tool count
tools_count = len(await mcp.list_tools())
logger.info(f"Registered all MCP tools, total {tools_count} tools")
return True
except Exception as e:
logger.error(f"Error registering MCP tools: {str(e)}")
logger.error(traceback.format_exc())
return False