0.3.0 Release Version

This commit is contained in:
FreeOnePlus
2025-06-08 18:44:40 +08:00
parent d9fed06c92
commit 4c913743c7
54 changed files with 12649 additions and 4667 deletions

View File

@@ -1,25 +1,9 @@
from .mcp_doris_tools import (
mcp_doris_exec_query,
mcp_doris_get_table_schema,
mcp_doris_get_db_table_list,
mcp_doris_get_db_list,
mcp_doris_get_table_comment,
mcp_doris_get_table_column_comments,
mcp_doris_get_table_indexes,
mcp_doris_get_recent_audit_logs,
mcp_doris_get_catalog_list
)
"""
MCP Tools Package - Contains all MCP tool implementations.
# The __all__ list should reflect the registered tool names,
# even though the implementation functions have the prefix.
__all__ = [
"exec_query",
"get_table_schema",
"get_db_table_list",
"get_db_list",
"get_table_comment",
"get_table_column_comments",
"get_table_indexes",
"get_recent_audit_logs",
"get_catalog_list"
]
This package includes:
- Doris database tools
- Resource managers
- Prompt managers
- Tool registration and initialization
"""

View File

@@ -1,230 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Doris MCP Tool Implementations
Includes exec_query and new tools based on schema_extractor.
"""
import os
import time
import json
import logging
from typing import Dict, Any
import pandas as pd
# --- Use absolute imports ---
from doris_mcp_server.utils.schema_extractor import MetadataExtractor
from doris_mcp_server.utils.sql_executor_tools import execute_sql_query
# Get logger
logger = logging.getLogger("doris-mcp-tools")
# --- Helper Function to format response ---
def _format_response(success: bool, result: Any = None, error: str = None, message: str = "") -> Dict[str, Any]:
response_data = {
"success": success,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
}
if success and result is not None:
# Handle DataFrame serialization
if isinstance(result, pd.DataFrame):
try:
# Convert DataFrame to JSON records format
response_data["result"] = json.loads(result.to_json(orient='records', date_format='iso'))
except Exception as df_err:
logger.error(f"DataFrame to JSON conversion failed: {df_err}")
# Fallback or specific error handling for DataFrame
response_data["result"] = {"error": "Failed to serialize DataFrame result"}
response_data["success"] = False # Mark as failed if serialization fails
response_data["error"] = f"DataFrame serialization error: {str(df_err)}"
else:
response_data["result"] = result
response_data["message"] = message or "Operation successful" # Translated: Operation successful
elif not success:
response_data["error"] = error or "Unknown error" # Translated: Unknown error
response_data["message"] = message or "Operation failed" # Translated: Operation failed
return {
"content": [
{
"type": "text",
"text": json.dumps(response_data, ensure_ascii=False, default=str) # Use default=str for non-serializable types
}
]
}
async def mcp_doris_exec_query(sql: str = None, db_name: str = None, catalog_name: str = None, max_rows: int = 100, timeout: int = 30) -> Dict[str, Any]:
"""
Executes an SQL query and returns the result with catalog federation support.
Args:
sql (str): The SQL query to execute. MUST use three-part naming for table references:
- Internal tables: internal.db_name.table_name (e.g., "SELECT * FROM internal.ssb.customer")
- External tables: catalog_name.db_name.table_name (e.g., "SELECT * FROM mysql.ssb.customer")
- Cross-catalog queries: "SELECT * FROM mysql.ssb.customer m JOIN internal.ssb.orders o ON m.id = o.customer_id"
Examples:
- Query internal catalog: "SELECT COUNT(*) FROM internal.ssb.customer"
- Query MySQL catalog: "SELECT COUNT(*) FROM mysql.ssb.customer"
- Cross-catalog join: "SELECT * FROM internal.ssb.customer c JOIN mysql.test.user_info u ON c.id = u.customer_id"
db_name (str, optional): Target database name. Only used for connection context, table names in SQL must be fully qualified.
catalog_name (str, optional): Reference catalog name for context. Does not affect SQL execution - table names in SQL must be fully qualified.
Available catalogs can be found using get_catalog_list tool.
max_rows (int, optional): Maximum number of rows to return. Defaults to 100.
timeout (int, optional): Query timeout in seconds. Defaults to 30.
Returns:
Dict[str, Any]: A dictionary containing the query result or an error.
"""
logger.info(f"MCP Tool Call: mcp_doris_exec_query, SQL: {sql}, DB: {db_name}, Catalog: {catalog_name}, MaxRows: {max_rows}, Timeout: {timeout}")
try:
if not sql:
return _format_response(success=False, error="SQL statement not provided", message="Please provide the SQL statement to execute")
# Build parameters to pass to execute_sql_query
exec_ctx = {
"params": {
"sql": sql,
"db_name": db_name,
"catalog_name": catalog_name,
"max_rows": max_rows,
"timeout": timeout
}
}
# Directly call execute_sql_query to execute the query
exec_result = await execute_sql_query(exec_ctx)
# The format returned by execute_sql_query is {'content': [{'type': 'text', 'text': json_string}]}
# Need to parse the internal JSON string
if exec_result and 'content' in exec_result and len(exec_result['content']) > 0 and 'text' in exec_result['content'][0]:
try:
# Parse JSON string
result_data = json.loads(exec_result['content'][0]['text'])
# Directly return the parsed result obtained from execute_sql_query
# This result is already in the format {"success": ..., "data": ..., "columns": ...} or {"success": false, "error": ...}
# _format_response would wrap it again, but here we directly use the parsed data
# Note: This changes the original return structure of this function; it now directly returns the output of sql_executor
# If the _format_response wrapper needs to be maintained, the code below needs adjustment
return {
"content": [
{
"type": "text",
"text": json.dumps(result_data, ensure_ascii=False, default=str)
}
]
}
except json.JSONDecodeError as json_err:
logger.error(f"Failed to parse execute_sql_query result: {json_err}")
return _format_response(success=False, error=str(json_err), message="Error parsing SQL execution result")
except Exception as parse_err:
logger.error(f"Unexpected error occurred while processing execute_sql_query result: {parse_err}", exc_info=True)
return _format_response(success=False, error=str(parse_err), message="Unknown error occurred while processing SQL execution result")
else:
logger.error(f"execute_sql_query returned an unexpected format: {exec_result}")
return _format_response(success=False, error="SQL executor returned invalid format", message="Internal error executing SQL query")
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_exec_query: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error executing SQL query")
async def mcp_doris_get_table_schema(table_name: str, db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_table_schema, Table: {table_name}, DB: {db_name}, Catalog: {catalog_name}")
if not table_name:
return _format_response(success=False, error="Missing table_name parameter")
try:
extractor = MetadataExtractor(db_name=db_name, catalog_name=catalog_name)
schema = extractor.get_table_schema(table_name=table_name, db_name=db_name, catalog_name=catalog_name)
if not schema:
return _format_response(success=False, error="Table not found or has no columns", message=f"Could not get schema for table {catalog_name or 'default'}.{db_name or extractor.db_name}.{table_name}")
return _format_response(success=True, result=schema)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_table_schema: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting table schema")
async def mcp_doris_get_db_table_list(db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_db_table_list, DB: {db_name}, Catalog: {catalog_name}")
try:
extractor = MetadataExtractor(db_name=db_name, catalog_name=catalog_name)
tables = extractor.get_database_tables(db_name=db_name, catalog_name=catalog_name)
return _format_response(success=True, result=tables)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_db_table_list: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting database table list")
async def mcp_doris_get_db_list(catalog_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_db_list, Catalog: {catalog_name}")
try:
extractor = MetadataExtractor(catalog_name=catalog_name)
databases = extractor.get_all_databases(catalog_name=catalog_name)
return _format_response(success=True, result=databases)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_db_list: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting database list")
async def mcp_doris_get_table_comment(table_name: str, db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_table_comment, Table: {table_name}, DB: {db_name}, Catalog: {catalog_name}")
if not table_name:
return _format_response(success=False, error="Missing table_name parameter")
try:
extractor = MetadataExtractor(db_name=db_name, catalog_name=catalog_name)
comment = extractor.get_table_comment(table_name=table_name, db_name=db_name, catalog_name=catalog_name)
return _format_response(success=True, result=comment)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_table_comment: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting table comment")
async def mcp_doris_get_table_column_comments(table_name: str, db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_table_column_comments, Table: {table_name}, DB: {db_name}, Catalog: {catalog_name}")
if not table_name:
return _format_response(success=False, error="Missing table_name parameter")
try:
extractor = MetadataExtractor(db_name=db_name, catalog_name=catalog_name)
comments = extractor.get_column_comments(table_name=table_name, db_name=db_name, catalog_name=catalog_name)
return _format_response(success=True, result=comments)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_table_column_comments: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting column comments")
async def mcp_doris_get_table_indexes(table_name: str, db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_table_indexes, Table: {table_name}, DB: {db_name}, Catalog: {catalog_name}")
if not table_name:
return _format_response(success=False, error="Missing table_name parameter")
try:
extractor = MetadataExtractor(db_name=db_name, catalog_name=catalog_name)
indexes = extractor.get_table_indexes(table_name=table_name, db_name=db_name, catalog_name=catalog_name)
return _format_response(success=True, result=indexes)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_table_indexes: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting table indexes")
async def mcp_doris_get_recent_audit_logs(days: int = 7, limit: int = 100) -> Dict[str, Any]:
logger.info(f"MCP Tool Call: mcp_doris_get_recent_audit_logs, Days: {days}, Limit: {limit}")
try:
extractor = MetadataExtractor()
logs_df = extractor.get_recent_audit_logs(days=days, limit=limit)
return _format_response(success=True, result=logs_df)
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_recent_audit_logs: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting audit logs")
async def mcp_doris_get_catalog_list() -> Dict[str, Any]:
"""
Get Doris catalog list
Returns:
Dict[str, Any]: Dictionary containing catalog list or error information
"""
logger.info(f"MCP Tool Call: mcp_doris_get_catalog_list")
try:
extractor = MetadataExtractor()
catalogs = extractor.get_catalog_list()
return _format_response(success=True, result=catalogs, message="Successfully retrieved catalog list")
except Exception as e:
logger.error(f"MCP tool execution failed mcp_doris_get_catalog_list: {str(e)}", exc_info=True)
return _format_response(success=False, error=str(e), message="Error getting catalog list")

View File

@@ -0,0 +1,455 @@
"""
Apache Doris MCP Prompts Manager
Provides standardized management of query templates and intelligent prompts
"""
from datetime import datetime
from typing import Any
from mcp.types import (
GetPromptResult,
Prompt,
PromptArgument,
PromptMessage,
TextContent,
)
from ..utils.db import DorisConnectionManager
class PromptTemplate:
"""Prompt template"""
def __init__(
self,
name: str,
description: str,
template: str,
arguments: list[PromptArgument] = None,
category: str = "general",
):
self.name = name
self.description = description
self.template = template
self.arguments = arguments or []
self.category = category
self.created_at = datetime.now()
def render(self, arguments: dict[str, Any]) -> str:
"""Render template content"""
content = self.template
for key, value in arguments.items():
placeholder = f"{{{key}}}"
content = content.replace(placeholder, str(value))
return content
class DorisPromptsManager:
"""Apache Doris Prompts Manager"""
def __init__(self, connection_manager: DorisConnectionManager):
self.connection_manager = connection_manager
self.templates = self._init_prompt_templates()
def _init_prompt_templates(self) -> dict[str, PromptTemplate]:
"""Initialize prompt templates"""
templates = {}
# Sales data analysis template
templates["sales_analysis"] = PromptTemplate(
name="sales_analysis",
description="Sales data analysis query template for generating sales statistics and trend analysis queries",
template="""Please help me analyze sales data with the following requirements:
Analysis time range: {date_range}
{product_filter}
{region_filter}
Please generate SQL queries to analyze the following dimensions:
1. Total sales amount and order quantity
2. Sales trends by time dimension
3. Top-selling product rankings
4. Sales personnel performance statistics
Data table structure reference:
- Order table: Contains order ID, customer ID, salesperson ID, order amount, order time and other fields
- Product table: Contains product ID, product name, product category, price and other fields
- Customer table: Contains customer ID, customer name, region and other fields
Please ensure query results are easy to understand and analyze.""",
arguments=[
PromptArgument(
name="date_range",
description="Date range for analysis, such as 'Q1 2024' or 'last 30 days'",
required=True,
),
PromptArgument(
name="product_category",
description="Product category filter condition, such as 'electronics'",
required=False,
),
PromptArgument(
name="region",
description="Sales region filter condition, such as 'East China'",
required=False,
),
],
category="business_analysis",
)
# User behavior analysis template
templates["user_behavior_analysis"] = PromptTemplate(
name="user_behavior_analysis",
description="User behavior analysis query template for analyzing user activity patterns and preferences",
template="""Please help me analyze user behavior data, analysis objectives:
User segment: {user_segment}
{behavior_filter}
Analysis period: {time_period}
Please generate SQL queries to analyze the following aspects:
1. User activity statistics (DAU, MAU)
2. User behavior path analysis
3. Feature usage preference statistics
4. User retention rate analysis
Data table structure reference:
- User table: Contains user ID, registration time, user type, region and other fields
- Behavior log table: Contains user ID, behavior type, behavior time, page path and other fields
- Session table: Contains session ID, user ID, session start time, session duration and other fields
Please provide easy-to-understand statistical results and visualization suggestions.""",
arguments=[
PromptArgument(
name="user_segment",
description="User segment conditions, such as 'new users', 'active users'",
required=True,
),
PromptArgument(
name="behavior_type",
description="Behavior type filter, such as 'login', 'purchase', 'browse'",
required=False,
),
PromptArgument(
name="time_period",
description="Analysis time period, such as 'last 7 days', 'this month'",
required=False,
),
],
category="user_analysis",
)
# Performance optimization analysis template
templates["performance_optimization"] = PromptTemplate(
name="performance_optimization",
description="Database performance optimization analysis template for identifying performance bottlenecks and optimization opportunities",
template="""Please help me with database performance analysis and optimization recommendations:
Focus area: {focus_area}
{table_scope}
Performance metrics: {metrics}
Please generate SQL queries to analyze the following content:
1. Table and query performance statistics
2. Index usage efficiency analysis
3. Slow query identification and analysis
4. Storage space usage
Analysis objectives:
- Identify performance bottlenecks
- Provide optimization recommendations
- Evaluate optimization effects
Please provide specific optimization recommendations and implementation steps.""",
arguments=[
PromptArgument(
name="focus_area",
description="Performance area of focus, such as 'query performance', 'storage optimization'",
required=True,
),
PromptArgument(
name="table_name",
description="Specific table name (optional), if analyzing specific table performance",
required=False,
),
PromptArgument(
name="metrics",
description="Performance metrics of interest, such as 'response time', 'throughput'",
required=False,
),
],
category="performance",
)
# Data quality check template
templates["data_quality_check"] = PromptTemplate(
name="data_quality_check",
description="Data quality check template for detecting data integrity and consistency issues",
template="""Please help me perform data quality checks:
Check target: {target_table}
{quality_dimensions}
Check level: {check_level}
Please generate SQL queries to check the following data quality issues:
1. Data integrity (null values, duplicate values)
2. Data consistency (format, range)
3. Data accuracy (business rule validation)
4. Data timeliness (update frequency)
Check items:
- Required field null value checks
- Primary key and unique constraint validation
- Data format and type checks
- Business logic consistency validation
- Data distribution anomaly detection
Please provide detailed problem reports and fix recommendations.""",
arguments=[
PromptArgument(
name="target_table", description="Target table name to check", required=True
),
PromptArgument(
name="quality_dimensions",
description="Quality check dimensions, such as 'integrity', 'consistency', 'accuracy'",
required=False,
),
PromptArgument(
name="check_level",
description="Check level, such as 'basic check', 'deep check'",
required=False,
),
],
category="data_quality",
)
# Report generation template
templates["report_generation"] = PromptTemplate(
name="report_generation",
description="Business report generation template for creating standardized business reports",
template="""Please help me generate business reports:
Report type: {report_type}
Report period: {report_period}
{business_scope}
Please generate SQL queries to build the following report content:
1. Key business indicator summary
2. Trend analysis and year-over-year/month-over-month comparison
3. Anomaly data identification and explanation
4. Business insights and recommendations
Report requirements:
- Data accuracy and timeliness
- Clear hierarchical structure
- Easy-to-understand data presentation
- Decision-supporting analytical perspective
Please provide complete report structure and data acquisition logic.""",
arguments=[
PromptArgument(
name="report_type",
description="Report type, such as 'sales report', 'operations report', 'financial report'",
required=True,
),
PromptArgument(
name="report_period",
description="Report period, such as 'daily report', 'weekly report', 'monthly report'",
required=True,
),
PromptArgument(
name="business_unit",
description="Business unit scope, such as 'East China region', 'Product line A'",
required=False,
),
],
category="reporting",
)
# Real-time monitoring template
templates["real_time_monitoring"] = PromptTemplate(
name="real_time_monitoring",
description="Real-time monitoring query template for building real-time data monitoring and alerting",
template="""Please help me design real-time monitoring queries:
Monitoring target: {monitoring_target}
Alert threshold: {alert_threshold}
Monitoring frequency: {monitoring_frequency}
Please generate SQL queries to implement the following monitoring functions:
1. Real-time statistics of key indicators
2. Anomaly detection and alerting
3. Trend change monitoring
4. System health status checks
Monitoring dimensions:
- Business indicator monitoring (transaction volume, user activity, etc.)
- Technical indicator monitoring (performance, error rate, etc.)
- Data quality monitoring (integrity, consistency, etc.)
Please provide complete monitoring solution and implementation recommendations.""",
arguments=[
PromptArgument(
name="monitoring_target",
description="Monitoring target, such as 'transaction system', 'user activity'",
required=True,
),
PromptArgument(
name="alert_threshold",
description="Alert threshold setting, such as 'error rate > 5%'",
required=False,
),
PromptArgument(
name="monitoring_frequency",
description="Monitoring frequency, such as 'real-time', 'every minute', 'every 5 minutes'",
required=False,
),
],
category="monitoring",
)
return templates
async def list_prompts(self) -> list[Prompt]:
"""List all available prompt templates"""
prompts = []
for template in self.templates.values():
prompt = Prompt(
name=template.name,
description=template.description,
arguments=template.arguments,
)
prompts.append(prompt)
return prompts
async def get_prompt(self, name: str, arguments: dict[str, Any]) -> GetPromptResult:
"""Get content of specific prompt template"""
if name not in self.templates:
raise ValueError(f"Prompt template named '{name}' not found")
template = self.templates[name]
# Process optional arguments
processed_args = await self._process_arguments(template, arguments)
# Render template content
rendered_content = template.render(processed_args)
# Add database context information
context_info = await self._get_database_context()
full_content = f"""{rendered_content}
Database context information:
{context_info}
Please generate accurate and efficient SQL queries based on the above requirements and database structure."""
return GetPromptResult(
description=template.description,
messages=[
PromptMessage(
role="user", content=TextContent(type="text", text=full_content)
)
],
)
async def _process_arguments(
self, template: PromptTemplate, arguments: dict[str, Any]
) -> dict[str, Any]:
"""Process template arguments"""
processed = {}
for arg in template.arguments:
if arg.name in arguments:
processed[arg.name] = arguments[arg.name]
elif arg.required:
raise ValueError(f"Missing required parameter: {arg.name}")
else:
# Provide default handling for optional parameters
processed[arg.name] = self._get_default_argument_text(arg.name)
return processed
def _get_default_argument_text(self, arg_name: str) -> str:
"""Get default text for optional parameters"""
defaults = {
"product_category": "",
"region": "",
"behavior_type": "",
"time_period": "No time range restriction",
"table_name": "",
"metrics": "All performance metrics",
"quality_dimensions": "All quality dimensions",
"check_level": "Standard check",
"business_unit": "Full business scope",
"alert_threshold": "Use default threshold",
"monitoring_frequency": "Real-time monitoring",
}
return defaults.get(arg_name, "")
async def _get_database_context(self) -> str:
"""Get database context information"""
try:
connection = await self.connection_manager.get_connection("system")
# Get basic database information
db_info_sql = """
SELECT
COUNT(*) as table_count,
SUM(table_rows) as total_rows
FROM information_schema.tables
WHERE table_schema = DATABASE()
AND table_type = 'BASE TABLE'
"""
db_result = await connection.execute(db_info_sql)
db_info = db_result.data[0] if db_result.data else {}
# Get main table list
tables_sql = """
SELECT
table_name,
table_comment,
table_rows
FROM information_schema.tables
WHERE table_schema = DATABASE()
AND table_type = 'BASE TABLE'
ORDER BY table_rows DESC
LIMIT 10
"""
tables_result = await connection.execute(tables_sql)
context = f"""Current database statistics:
- Total number of tables: {db_info.get("table_count", 0)}
- Total data rows: {db_info.get("total_rows", 0):,}
Main data tables:"""
for table in tables_result.data:
context += f"\n- {table['table_name']}"
if table.get("table_comment"):
context += f": {table['table_comment']}"
context += f" ({table.get('table_rows', 0):,} rows)"
return context
except Exception as e:
return f"Unable to get database context information: {str(e)}"
def get_templates_by_category(self, category: str) -> list[PromptTemplate]:
"""Get templates by category"""
return [
template
for template in self.templates.values()
if template.category == category
]
def get_all_categories(self) -> list[str]:
"""Get all template categories"""
categories = {template.category for template in self.templates.values()}
return sorted(categories)

View File

@@ -0,0 +1,361 @@
"""
Apache Doris MCP Resources Manager
Provides standardized abstraction and access interface for database metadata
"""
import json
from datetime import datetime
from typing import Any
from mcp.types import Resource
from ..utils.db import DorisConnectionManager
class TableMetadata:
"""Data table metadata"""
def __init__(
self,
name: str,
comment: str = None,
row_count: int = 0,
columns: list[dict] = None,
create_time: datetime = None,
):
self.name = name
self.comment = comment
self.row_count = row_count
self.columns = columns or []
self.create_time = create_time
class ViewMetadata:
"""Data view metadata"""
def __init__(self, name: str, comment: str = None, definition: str = None):
self.name = name
self.comment = comment
self.definition = definition
class MetadataCache:
"""Metadata cache manager"""
def __init__(self, ttl_seconds: int = 300):
self.cache = {}
self.ttl = ttl_seconds
async def get(self, key: str) -> Any | None:
if key in self.cache:
data, timestamp = self.cache[key]
if datetime.now().timestamp() - timestamp < self.ttl:
return data
else:
del self.cache[key]
return None
async def set(self, key: str, value: Any):
self.cache[key] = (value, datetime.now().timestamp())
class DorisResourcesManager:
"""Apache Doris Resources Manager"""
def __init__(self, connection_manager: DorisConnectionManager):
self.connection_manager = connection_manager
self.metadata_cache = MetadataCache()
async def list_resources(self) -> list[Resource]:
"""List all available database resources"""
resources = []
try:
# Get metadata for all tables
tables = await self._get_table_metadata()
for table in tables:
resources.append(
Resource(
uri=f"doris://table/{table.name}",
name=f"Data Table: {table.name}",
description=f"{table.comment or 'Data table'} (rows: {table.row_count:,})",
mimeType="application/json",
)
)
# Get metadata for all views
views = await self._get_view_metadata()
for view in views:
resources.append(
Resource(
uri=f"doris://view/{view.name}",
name=f"Data View: {view.name}",
description=f"{view.comment or 'Data view'}",
mimeType="application/json",
)
)
# Add database statistics resource
resources.append(
Resource(
uri="doris://stats/database",
name="Database Statistics",
description="Overall database statistics and performance metrics",
mimeType="application/json",
)
)
except Exception as e:
print(f"Failed to get resource list: {e}")
return resources
async def read_resource(self, uri: str) -> str:
"""Read detailed information of specific resource"""
try:
resource_type, resource_name = self._parse_resource_uri(uri)
if resource_type == "table":
return await self._get_table_schema(resource_name)
elif resource_type == "view":
return await self._get_view_definition(resource_name)
elif resource_type == "stats" and resource_name == "database":
return await self._get_database_stats()
else:
raise ValueError(f"Unsupported resource type: {resource_type}")
except Exception as e:
return json.dumps(
{"error": f"Failed to read resource: {str(e)}", "uri": uri},
ensure_ascii=False,
indent=2,
)
async def _get_table_metadata(self) -> list[TableMetadata]:
"""Get metadata for all tables"""
cache_key = "table_metadata"
cached = await self.metadata_cache.get(cache_key)
if cached:
return cached
connection = await self.connection_manager.get_connection("system")
# Query basic table information
tables_query = """
SELECT
table_name,
table_comment,
table_rows as row_count,
create_time
FROM information_schema.tables
WHERE table_schema = DATABASE()
AND table_type = 'BASE TABLE'
ORDER BY table_name
"""
result = await connection.execute(tables_query)
tables = []
for row in result.data:
# Get column information for the table
columns = await self._get_table_columns(connection, row["table_name"])
table = TableMetadata(
name=row["table_name"],
comment=row.get("table_comment"),
row_count=row.get("row_count", 0),
columns=columns,
create_time=row.get("create_time"),
)
tables.append(table)
await self.metadata_cache.set(cache_key, tables)
return tables
async def _get_table_columns(self, connection, table_name: str) -> list[dict]:
"""Get column information for table"""
columns_query = """
SELECT
column_name,
data_type,
is_nullable,
column_default,
column_comment,
column_key
FROM information_schema.columns
WHERE table_schema = DATABASE()
AND table_name = %s
ORDER BY ordinal_position
"""
result = await connection.execute(columns_query, (table_name,))
return [dict(row) for row in result.data]
async def _get_view_metadata(self) -> list[ViewMetadata]:
"""Get metadata for all views"""
cache_key = "view_metadata"
cached = await self.metadata_cache.get(cache_key)
if cached:
return cached
connection = await self.connection_manager.get_connection("system")
views_query = """
SELECT
table_name,
table_comment,
view_definition
FROM information_schema.views
WHERE table_schema = DATABASE()
ORDER BY table_name
"""
result = await connection.execute(views_query)
views = []
for row in result.data:
view = ViewMetadata(
name=row["table_name"],
comment=row.get("table_comment"),
definition=row.get("view_definition"),
)
views.append(view)
await self.metadata_cache.set(cache_key, views)
return views
async def _get_table_schema(self, table_name: str) -> str:
"""Get detailed structure information of table"""
connection = await self.connection_manager.get_connection("system")
# Get basic table information
table_info_query = """
SELECT
table_name,
table_comment,
table_rows,
create_time,
engine
FROM information_schema.tables
WHERE table_schema = DATABASE()
AND table_name = %s
"""
table_result = await connection.execute(table_info_query, (table_name,))
if not table_result.data:
raise ValueError(f"Table {table_name} does not exist")
table_info = table_result.data[0]
# Get column information
columns = await self._get_table_columns(connection, table_name)
# Get index information
indexes = await self._get_table_indexes(connection, table_name)
schema_info = {
"table_name": table_info["table_name"],
"comment": table_info.get("table_comment"),
"row_count": table_info.get("table_rows", 0),
"create_time": str(table_info.get("create_time")),
"engine": table_info.get("engine"),
"columns": columns,
"indexes": indexes,
}
return json.dumps(schema_info, ensure_ascii=False, indent=2)
async def _get_table_indexes(self, connection, table_name: str) -> list[dict]:
"""Get index information for table"""
indexes_query = """
SELECT
index_name,
column_name,
index_type,
non_unique
FROM information_schema.statistics
WHERE table_schema = DATABASE()
AND table_name = %s
ORDER BY index_name, seq_in_index
"""
result = await connection.execute(indexes_query, (table_name,))
return [dict(row) for row in result.data]
async def _get_view_definition(self, view_name: str) -> str:
"""Get definition information of view"""
connection = await self.connection_manager.get_connection("system")
view_query = """
SELECT
table_name,
table_comment,
view_definition
FROM information_schema.views
WHERE table_schema = DATABASE()
AND table_name = %s
"""
result = await connection.execute(view_query, (view_name,))
if not result.data:
raise ValueError(f"View {view_name} does not exist")
view_info = result.data[0]
schema_info = {
"view_name": view_info["table_name"],
"comment": view_info.get("table_comment"),
"definition": view_info.get("view_definition"),
}
return json.dumps(schema_info, ensure_ascii=False, indent=2)
async def _get_database_stats(self) -> str:
"""Get database statistics"""
connection = await self.connection_manager.get_connection("system")
# Get table statistics
table_stats_query = """
SELECT
COUNT(*) as table_count,
SUM(table_rows) as total_rows
FROM information_schema.tables
WHERE table_schema = DATABASE()
AND table_type = 'BASE TABLE'
"""
table_result = await connection.execute(table_stats_query)
table_stats = table_result.data[0] if table_result.data else {}
# Get view statistics
view_stats_query = """
SELECT COUNT(*) as view_count
FROM information_schema.views
WHERE table_schema = DATABASE()
"""
view_result = await connection.execute(view_stats_query)
view_stats = view_result.data[0] if view_result.data else {}
stats_info = {
"database_name": "current_database",
"table_count": table_stats.get("table_count", 0),
"view_count": view_stats.get("view_count", 0),
"total_rows": table_stats.get("total_rows", 0),
"last_updated": datetime.now().isoformat(),
}
return json.dumps(stats_info, ensure_ascii=False, indent=2)
def _parse_resource_uri(self, uri: str) -> tuple:
"""Parse resource URI"""
if not uri.startswith("doris://"):
raise ValueError("Invalid resource URI format")
path = uri[8:] # Remove "doris://" prefix
parts = path.split("/")
if len(parts) < 2:
raise ValueError("Incomplete resource URI format")
return parts[0], parts[1]

View File

@@ -1,157 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Tool Initialization Module
Centralized initialization of all tools, ensuring they are correctly registered with MCP
"""
import logging
import os
from typing import List, Dict, Any, Optional
import json
from datetime import datetime
import traceback
# Import Context
from mcp.server.fastmcp import Context
# Import doris mcp tools
from doris_mcp_server.tools.mcp_doris_tools import (
mcp_doris_exec_query,
mcp_doris_get_table_schema,
mcp_doris_get_db_table_list,
mcp_doris_get_db_list,
mcp_doris_get_table_comment,
mcp_doris_get_table_column_comments,
mcp_doris_get_table_indexes,
mcp_doris_get_recent_audit_logs,
mcp_doris_get_catalog_list
)
# Get logger
logger = logging.getLogger("doris-mcp-tools-initializer")
async def register_mcp_tools(mcp):
"""Register MCP tool functions
Args:
mcp: FastMCP instance
"""
logger.info("Starting to register MCP tools...")
try:
# Register Tool: Execute SQL Query (Using long description string including parameters)
@mcp.tool("exec_query", description="""[Function Description]: Execute SQL query and return result command with catalog federation support.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- sql (string) [Required] - SQL statement to execute. MUST use three-part naming for all table references: 'catalog_name.db_name.table_name'. For internal tables use 'internal.db_name.table_name', for external tables use 'catalog_name.db_name.table_name'\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n
- catalog_name (string) [Optional] - Reference catalog name for context, defaults to current catalog\n
- max_rows (integer) [Optional] - Maximum number of rows to return, default 100
- timeout (integer) [Optional] - Query timeout in seconds, default 30""")
async def exec_query_tool(sql: str, db_name: str = None, catalog_name: str = None, max_rows: int = 100, timeout: int = 30) -> Dict[str, Any]:
"""Wrapper: Execute SQL query and return result command"""
# Note: ctx parameter is no longer needed here as we receive named parameters directly
return await mcp_doris_exec_query(sql=sql, db_name=db_name, catalog_name=catalog_name, max_rows=max_rows, timeout=timeout)
# Register Tool: Get Table Schema (Keep long description string including parameters)
@mcp.tool("get_table_schema", description="""[Function Description]: Get detailed structure information of the specified table (columns, types, comments, etc.).\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- table_name (string) [Required] - Name of the table to query\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog\n""")
async def get_table_schema_tool(table_name: str, db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get table schema"""
if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
return await mcp_doris_get_table_schema(table_name=table_name, db_name=db_name, catalog_name=catalog_name)
# Register Tool: Get Database Table List (Keep long description string including parameters)
@mcp.tool("get_db_table_list", description="""[Function Description]: Get a list of all table names in the specified database.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog\n""")
async def get_db_table_list_tool(db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get database table list"""
return await mcp_doris_get_db_table_list(db_name=db_name, catalog_name=catalog_name)
# Register Tool: Get Database List (Keep long description string including parameters)
# Note: Although the description mentions random_string, the wrapper function signature does not. See how mcp handles this.
@mcp.tool("get_db_list", description="""[Function Description]: Get a list of all database names on the server.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog\n""")
async def get_db_list_tool(catalog_name: str = None) -> Dict[str, Any]: # Function signature has no parameters
"""Wrapper: Get database list"""
return await mcp_doris_get_db_list(catalog_name=catalog_name)
# Register Tool: Get Table Comment (Keep long description string including parameters)
@mcp.tool("get_table_comment", description="""[Function Description]: Get the comment information for the specified table.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- table_name (string) [Required] - Name of the table to query\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog\n""")
async def get_table_comment_tool(table_name: str, db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get table comment"""
if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
return await mcp_doris_get_table_comment(table_name=table_name, db_name=db_name, catalog_name=catalog_name)
# Register Tool: Get Table Column Comments (Keep long description string including parameters)
@mcp.tool("get_table_column_comments", description="""[Function Description]: Get comment information for all columns in the specified table.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- table_name (string) [Required] - Name of the table to query\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog\n""")
async def get_table_column_comments_tool(table_name: str, db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get table column comments"""
if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
return await mcp_doris_get_table_column_comments(table_name=table_name, db_name=db_name, catalog_name=catalog_name)
# Register Tool: Get Table Indexes (Keep long description string including parameters)
@mcp.tool("get_table_indexes", description="""[Function Description]: Get index information for the specified table.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- table_name (string) [Required] - Name of the table to query\n
- db_name (string) [Optional] - Target database name, defaults to the current database\n
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog\n""")
async def get_table_indexes_tool(table_name: str, db_name: str = None, catalog_name: str = None) -> Dict[str, Any]:
"""Wrapper: Get table indexes"""
if not table_name: return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "Missing table_name parameter"})}]}
return await mcp_doris_get_table_indexes(table_name=table_name, db_name=db_name, catalog_name=catalog_name)
# Register Tool: Get Recent Audit Logs (Keep long description string including parameters)
@mcp.tool("get_recent_audit_logs", description="""[Function Description]: Get audit log records for a recent period.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n
- days (integer) [Optional] - Number of recent days of logs to retrieve, default is 7\n
- limit (integer) [Optional] - Maximum number of records to return, default is 100\n""")
async def get_recent_audit_logs_tool(days: int = 7, limit: int = 100) -> Dict[str, Any]:
"""Wrapper: Get recent audit logs"""
try:
days = int(days)
limit = int(limit)
except (ValueError, TypeError):
return {"content": [{"type": "text", "text": json.dumps({"success": False, "error": "days and limit parameters must be integers"})}]}
return await mcp_doris_get_recent_audit_logs(days=days, limit=limit)
# Register Tool: Get Catalog List (Keep long description string including parameters)
@mcp.tool("get_catalog_list", description="""[Function Description]: Get a list of all catalog names on the server.\n
[Parameter Content]:\n
- random_string (string) [Required] - Unique identifier for the tool call\n""")
async def get_catalog_list_tool() -> Dict[str, Any]:
"""Wrapper: Get catalog list"""
return await mcp_doris_get_catalog_list()
# Get tool count
tools_count = len(await mcp.list_tools())
logger.info(f"Registered all MCP tools, total {tools_count} tools")
return True
except Exception as e:
logger.error(f"Error registering MCP tools: {str(e)}")
logger.error(traceback.format_exc())
return False

View File

@@ -0,0 +1,766 @@
"""
Apache Doris MCP Tools Manager
Responsible for tool registration, management, scheduling and routing, does not contain specific business logic implementation
"""
import json
import time
from datetime import datetime
from typing import Any, Dict, List
from mcp.types import Tool
from ..utils.db import DorisConnectionManager
from ..utils.query_executor import DorisQueryExecutor
from ..utils.analysis_tools import TableAnalyzer, PerformanceMonitor
from ..utils.schema_extractor import MetadataExtractor
from ..utils.logger import get_logger
logger = get_logger(__name__)
class DorisToolsManager:
"""Apache Doris Tools Manager"""
def __init__(self, connection_manager: DorisConnectionManager):
self.connection_manager = connection_manager
# Initialize business logic processors
self.query_executor = DorisQueryExecutor(connection_manager)
self.table_analyzer = TableAnalyzer(connection_manager)
self.performance_monitor = PerformanceMonitor(connection_manager)
self.metadata_extractor = MetadataExtractor(connection_manager=connection_manager)
logger.info("DorisToolsManager initialized with business logic processors")
async def register_tools_with_mcp(self, mcp):
"""Register all tools to MCP server"""
logger.info("Starting to register MCP tools")
# Column statistical analysis tool
@mcp.tool(
"column_analysis",
description="""[Function Description]: Analyze statistical information and data distribution of the specified column.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to analyze
- column_name (string) [Required] - Name of the column to analyze
- analysis_type (string) [Optional] - Type of analysis to perform, default is "basic"
* "basic": Basic statistics (count, null values, distinct values)
* "distribution": Data distribution analysis (frequency, percentiles)
* "detailed": Comprehensive analysis including all above plus patterns and outliers
""",
inputSchema={
"type": "object",
"properties": {
"table_name": {"type": "string", "description": "Table name"},
"column_name": {
"type": "string",
"description": "Column name to analyze",
},
"analysis_type": {
"type": "string",
"enum": ["basic", "distribution", "detailed"],
"description": "Analysis type",
"default": "basic",
},
},
"required": ["table_name", "column_name"],
}
)
async def column_analysis_tool(
table_name: str,
column_name: str,
analysis_type: str = "basic"
) -> str:
"""Column statistical analysis tool"""
return await self.call_tool("column_analysis", {
"table_name": table_name,
"column_name": column_name,
"analysis_type": analysis_type
})
# Database performance monitoring tool
@mcp.tool(
"performance_stats[Experimental]",
description="""[Important]: This tool is experimental and may not be fully functional!
[Function Description]: Get database performance statistics information.
[Parameter Content]:
- metric_type (string) [Optional] - Type of performance metrics to retrieve, default is "queries"
* "queries": Query performance metrics (execution time, frequency, etc.)
* "connections": Connection statistics (active connections, connection pool status)
* "tables": Table-level statistics (size, row count, access patterns)
* "system": System-level metrics (CPU, memory, disk usage)
- time_range (string) [Optional] - Time range for statistics, default is "1h"
* "1h": Last 1 hour
* "6h": Last 6 hours
* "24h": Last 24 hours
* "7d": Last 7 days
""",
inputSchema={
"type": "object",
"properties": {
"metric_type": {
"type": "string",
"enum": ["queries", "connections", "tables", "system"],
"description": "Performance metric type",
"default": "queries",
},
"time_range": {
"type": "string",
"enum": ["1h", "6h", "24h", "7d"],
"description": "Time range",
"default": "1h",
},
},
}
)
async def performance_stats_tool(
metric_type: str = "queries",
time_range: str = "1h"
) -> str:
"""Database performance monitoring tool"""
return await self.call_tool("performance_stats", {
"metric_type": metric_type,
"time_range": time_range
})
# SQL query execution tool (supports catalog federation queries)
@mcp.tool(
"exec_query",
description="""[Function Description]: Execute SQL query and return result command with catalog federation support.
[Parameter Content]:
- sql (string) [Required] - SQL statement to execute. MUST use three-part naming for all table references: 'catalog_name.db_name.table_name'. For internal tables use 'internal.db_name.table_name', for external tables use 'catalog_name.db_name.table_name'
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Reference catalog name for context, defaults to current catalog
- max_rows (integer) [Optional] - Maximum number of rows to return, default 100
- timeout (integer) [Optional] - Query timeout in seconds, default 30
""",
)
async def exec_query_tool(
sql: str,
db_name: str = None,
catalog_name: str = None,
max_rows: int = 100,
timeout: int = 30,
) -> str:
"""Execute SQL query (supports federation queries)"""
return await self.call_tool("exec_query", {
"sql": sql,
"db_name": db_name,
"catalog_name": catalog_name,
"max_rows": max_rows,
"timeout": timeout
})
# Get table schema tool
@mcp.tool(
"get_table_schema",
description="""[Function Description]: Get detailed structure information of the specified table (columns, types, comments, etc.).
[Parameter Content]:
- table_name (string) [Required] - Name of the table to query
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
)
async def get_table_schema_tool(
table_name: str, db_name: str = None, catalog_name: str = None
) -> str:
"""Get table schema information"""
return await self.call_tool("get_table_schema", {
"table_name": table_name,
"db_name": db_name,
"catalog_name": catalog_name
})
# Get database table list tool
@mcp.tool(
"get_db_table_list",
description="""[Function Description]: Get a list of all table names in the specified database.
[Parameter Content]:
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
)
async def get_db_table_list_tool(
db_name: str = None, catalog_name: str = None
) -> str:
"""Get database table list"""
return await self.call_tool("get_db_table_list", {
"db_name": db_name,
"catalog_name": catalog_name
})
# Get database list tool
@mcp.tool(
"get_db_list",
description="""[Function Description]: Get a list of all database names on the server.
[Parameter Content]:
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
)
async def get_db_list_tool(catalog_name: str = None) -> str:
"""Get database list"""
return await self.call_tool("get_db_list", {
"catalog_name": catalog_name
})
# Get table comment tool
@mcp.tool(
"get_table_comment",
description="""[Function Description]: Get the comment information for the specified table.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to query
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
)
async def get_table_comment_tool(
table_name: str, db_name: str = None, catalog_name: str = None
) -> str:
"""Get table comment"""
return await self.call_tool("get_table_comment", {
"table_name": table_name,
"db_name": db_name,
"catalog_name": catalog_name
})
# Get table column comments tool
@mcp.tool(
"get_table_column_comments",
description="""[Function Description]: Get comment information for all columns in the specified table.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to query
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
)
async def get_table_column_comments_tool(
table_name: str, db_name: str = None, catalog_name: str = None
) -> str:
"""Get table column comments"""
return await self.call_tool("get_table_column_comments", {
"table_name": table_name,
"db_name": db_name,
"catalog_name": catalog_name
})
# Get table indexes tool
@mcp.tool(
"get_table_indexes",
description="""[Function Description]: Get index information for the specified table.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to query
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
)
async def get_table_indexes_tool(
table_name: str, db_name: str = None, catalog_name: str = None
) -> str:
"""Get table indexes"""
return await self.call_tool("get_table_indexes", {
"table_name": table_name,
"db_name": db_name,
"catalog_name": catalog_name
})
# Get audit logs tool
@mcp.tool(
"get_recent_audit_logs",
description="""[Function Description]: Get audit log records for a recent period.
[Parameter Content]:
- days (integer) [Optional] - Number of recent days of logs to retrieve, default is 7
- limit (integer) [Optional] - Maximum number of records to return, default is 100
""",
)
async def get_recent_audit_logs_tool(
days: int = 7, limit: int = 100
) -> str:
"""Get audit logs"""
return await self.call_tool("get_recent_audit_logs", {
"days": days,
"limit": limit
})
# Get catalog list tool
@mcp.tool(
"get_catalog_list",
description="""[Function Description]: Get a list of all catalog names on the server.
[Parameter Content]:
- random_string (string) [Required] - Unique identifier for the tool call
""",
)
async def get_catalog_list_tool(random_string: str) -> str:
"""Get catalog list"""
return await self.call_tool("get_catalog_list", {
"random_string": random_string
})
logger.info("Successfully registered 11 tools to MCP server (2 core tools + 9 migrated tools)")
async def list_tools(self) -> List[Tool]:
"""List all available query tools (for stdio mode)"""
tools = [
Tool(
name="column_analysis[Experimental]",
description="""[Important]: This tool is experimental and may not be fully functional!
[Function Description]: Analyze statistical information and data distribution of the specified column.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to analyze
- column_name (string) [Required] - Name of the column to analyze
- analysis_type (string) [Optional] - Type of analysis to perform, default is "basic"
* "basic": Basic statistics (count, null values, distinct values)
* "distribution": Data distribution analysis (frequency, percentiles)
* "detailed": Comprehensive analysis including all above plus patterns and outliers
""",
inputSchema={
"type": "object",
"properties": {
"table_name": {"type": "string", "description": "Table name"},
"column_name": {
"type": "string",
"description": "Column name to analyze",
},
"analysis_type": {
"type": "string",
"enum": ["basic", "distribution", "detailed"],
"description": "Analysis type",
"default": "basic",
},
},
"required": ["table_name", "column_name"],
},
),
Tool(
name="performance_stats",
description="""[Function Description]: Get database performance statistics information.
[Parameter Content]:
- metric_type (string) [Optional] - Type of performance metrics to retrieve, default is "queries"
* "queries": Query performance metrics (execution time, frequency, etc.)
* "connections": Connection statistics (active connections, connection pool status)
* "tables": Table-level statistics (size, row count, access patterns)
* "system": System-level metrics (CPU, memory, disk usage)
- time_range (string) [Optional] - Time range for statistics, default is "1h"
* "1h": Last 1 hour
* "6h": Last 6 hours
* "24h": Last 24 hours
* "7d": Last 7 days
""",
inputSchema={
"type": "object",
"properties": {
"metric_type": {
"type": "string",
"enum": ["queries", "connections", "tables", "system"],
"description": "Performance metric type",
"default": "queries",
},
"time_range": {
"type": "string",
"enum": ["1h", "6h", "24h", "7d"],
"description": "Time range",
"default": "1h",
},
},
},
),
Tool(
name="exec_query",
description="""[Function Description]: Execute SQL query and return result command with catalog federation support.
[Parameter Content]:
- sql (string) [Required] - SQL statement to execute. MUST use three-part naming for all table references: 'catalog_name.db_name.table_name'. For internal tables use 'internal.db_name.table_name', for external tables use 'catalog_name.db_name.table_name'
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Reference catalog name for context, defaults to current catalog
- max_rows (integer) [Optional] - Maximum number of rows to return, default 100
- timeout (integer) [Optional] - Query timeout in seconds, default 30
""",
inputSchema={
"type": "object",
"properties": {
"sql": {"type": "string", "description": "SQL statement to execute, must use three-part naming"},
"db_name": {"type": "string", "description": "Target database name"},
"catalog_name": {"type": "string", "description": "Catalog name"},
"max_rows": {"type": "integer", "description": "Maximum number of rows to return", "default": 100},
"timeout": {"type": "integer", "description": "Timeout in seconds", "default": 30},
},
"required": ["sql"],
},
),
Tool(
name="get_table_schema",
description="""[Function Description]: Get detailed structure information of the specified table (columns, types, comments, etc.).
[Parameter Content]:
- table_name (string) [Required] - Name of the table to query
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
inputSchema={
"type": "object",
"properties": {
"table_name": {"type": "string", "description": "Table name"},
"db_name": {"type": "string", "description": "Database name"},
"catalog_name": {"type": "string", "description": "Catalog name"},
},
"required": ["table_name"],
},
),
Tool(
name="get_db_table_list",
description="""[Function Description]: Get a list of all table names in the specified database.
[Parameter Content]:
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
inputSchema={
"type": "object",
"properties": {
"db_name": {"type": "string", "description": "Database name"},
"catalog_name": {"type": "string", "description": "Catalog name"},
},
},
),
Tool(
name="get_db_list",
description="""[Function Description]: Get a list of all database names on the server.
[Parameter Content]:
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
inputSchema={
"type": "object",
"properties": {
"catalog_name": {"type": "string", "description": "Catalog name"},
},
},
),
Tool(
name="get_table_comment",
description="""[Function Description]: Get the comment information for the specified table.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to query
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
inputSchema={
"type": "object",
"properties": {
"table_name": {"type": "string", "description": "Table name"},
"db_name": {"type": "string", "description": "Database name"},
"catalog_name": {"type": "string", "description": "Catalog name"},
},
"required": ["table_name"],
},
),
Tool(
name="get_table_column_comments",
description="""[Function Description]: Get comment information for all columns in the specified table.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to query
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
inputSchema={
"type": "object",
"properties": {
"table_name": {"type": "string", "description": "Table name"},
"db_name": {"type": "string", "description": "Database name"},
"catalog_name": {"type": "string", "description": "Catalog name"},
},
"required": ["table_name"],
},
),
Tool(
name="get_table_indexes",
description="""[Function Description]: Get index information for the specified table.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to query
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
inputSchema={
"type": "object",
"properties": {
"table_name": {"type": "string", "description": "Table name"},
"db_name": {"type": "string", "description": "Database name"},
"catalog_name": {"type": "string", "description": "Catalog name"},
},
"required": ["table_name"],
},
),
Tool(
name="get_recent_audit_logs",
description="""[Function Description]: Get audit log records for a recent period.
[Parameter Content]:
- days (integer) [Optional] - Number of recent days of logs to retrieve, default is 7
- limit (integer) [Optional] - Maximum number of records to return, default is 100
""",
inputSchema={
"type": "object",
"properties": {
"days": {"type": "integer", "description": "Number of recent days", "default": 7},
"limit": {"type": "integer", "description": "Maximum number of records", "default": 100},
},
},
),
Tool(
name="get_catalog_list",
description="""[Function Description]: Get a list of all catalog names on the server.
[Parameter Content]:
- random_string (string) [Required] - Unique identifier for the tool call
""",
inputSchema={
"type": "object",
"properties": {
"random_string": {"type": "string", "description": "Unique identifier"},
},
"required": ["random_string"],
},
),
]
return tools
async def call_tool(self, name: str, arguments: Dict[str, Any]) -> str:
"""
Call the specified query tool (tool routing and scheduling center)
"""
try:
start_time = time.time()
# Tool routing - dispatch requests to corresponding business logic processors
if name == "column_analysis":
result = await self._column_analysis_tool(arguments)
elif name == "performance_stats":
result = await self._performance_stats_tool(arguments)
# ===== 9 tool routes migrated from source project =====
elif name == "exec_query":
result = await self._exec_query_tool(arguments)
elif name == "get_table_schema":
result = await self._get_table_schema_tool(arguments)
elif name == "get_db_table_list":
result = await self._get_db_table_list_tool(arguments)
elif name == "get_db_list":
result = await self._get_db_list_tool(arguments)
elif name == "get_table_comment":
result = await self._get_table_comment_tool(arguments)
elif name == "get_table_column_comments":
result = await self._get_table_column_comments_tool(arguments)
elif name == "get_table_indexes":
result = await self._get_table_indexes_tool(arguments)
elif name == "get_recent_audit_logs":
result = await self._get_recent_audit_logs_tool(arguments)
elif name == "get_catalog_list":
result = await self._get_catalog_list_tool(arguments)
else:
raise ValueError(f"Unknown tool: {name}")
execution_time = time.time() - start_time
# Add execution information
if isinstance(result, dict):
result["_execution_info"] = {
"tool_name": name,
"execution_time": round(execution_time, 3),
"timestamp": datetime.now().isoformat(),
}
return json.dumps(result, ensure_ascii=False, indent=2)
except Exception as e:
logger.error(f"Tool call failed {name}: {str(e)}")
error_result = {
"error": str(e),
"tool_name": name,
"arguments": arguments,
"timestamp": datetime.now().isoformat(),
}
return json.dumps(error_result, ensure_ascii=False, indent=2)
# The following are tool routing methods, responsible for calling corresponding business logic processors
async def _column_analysis_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Column statistical analysis tool routing"""
table_name = arguments.get("table_name")
column_name = arguments.get("column_name")
analysis_type = arguments.get("analysis_type", "basic")
# Delegate to table analyzer for processing
return await self.table_analyzer.analyze_column(
table_name, column_name, analysis_type
)
async def _performance_stats_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Database performance statistics tool routing"""
metric_type = arguments.get("metric_type", "queries")
time_range = arguments.get("time_range", "1h")
# Delegate to performance monitor for processing
return await self.performance_monitor.get_performance_stats(
metric_type, time_range
)
async def _exec_query_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""SQL query execution tool routing (supports federation queries)"""
sql = arguments.get("sql")
db_name = arguments.get("db_name")
catalog_name = arguments.get("catalog_name")
max_rows = arguments.get("max_rows", 100)
timeout = arguments.get("timeout", 30)
# Delegate to metadata extractor for processing
return await self.metadata_extractor.exec_query_for_mcp(
sql, db_name, catalog_name, max_rows, timeout
)
async def _get_table_schema_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get table schema tool routing"""
table_name = arguments.get("table_name")
db_name = arguments.get("db_name")
catalog_name = arguments.get("catalog_name")
# Delegate to metadata extractor for processing
return await self.metadata_extractor.get_table_schema_for_mcp(
table_name, db_name, catalog_name
)
async def _get_db_table_list_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get database table list tool routing"""
db_name = arguments.get("db_name")
catalog_name = arguments.get("catalog_name")
# Delegate to metadata extractor for processing
return await self.metadata_extractor.get_db_table_list_for_mcp(db_name, catalog_name)
async def _get_db_list_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get database list tool routing"""
catalog_name = arguments.get("catalog_name")
# Delegate to metadata extractor for processing
return await self.metadata_extractor.get_db_list_for_mcp(catalog_name)
async def _get_table_comment_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get table comment tool routing"""
table_name = arguments.get("table_name")
db_name = arguments.get("db_name")
catalog_name = arguments.get("catalog_name")
# Delegate to metadata extractor for processing
return await self.metadata_extractor.get_table_comment_for_mcp(
table_name, db_name, catalog_name
)
async def _get_table_column_comments_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get table column comments tool routing"""
table_name = arguments.get("table_name")
db_name = arguments.get("db_name")
catalog_name = arguments.get("catalog_name")
# Delegate to metadata extractor for processing
return await self.metadata_extractor.get_table_column_comments_for_mcp(
table_name, db_name, catalog_name
)
async def _get_table_indexes_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get table indexes tool routing"""
table_name = arguments.get("table_name")
db_name = arguments.get("db_name")
catalog_name = arguments.get("catalog_name")
# Delegate to metadata extractor for processing
return await self.metadata_extractor.get_table_indexes_for_mcp(
table_name, db_name, catalog_name
)
async def _get_recent_audit_logs_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get audit logs tool routing"""
days = arguments.get("days", 7)
limit = arguments.get("limit", 100)
# Delegate to metadata extractor for processing
return await self.metadata_extractor.get_recent_audit_logs_for_mcp(days, limit)
async def _get_catalog_list_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Get catalog list tool routing"""
# random_string parameter is required in the source project, but not actually used in business logic
# Here we ignore it and directly call business logic
# Delegate to metadata extractor for processing
return await self.metadata_extractor.get_catalog_list_for_mcp()