22 Commits
0.3.0 ... 0.4.2

Author SHA1 Message Date
FreeOnePlus
26e8bc1149 change pipy project name 2025-06-27 12:44:57 +08:00
FreeOnePlus
8526cb75fe v0.4.2 preview 2025-06-26 20:23:54 +08:00
FreeOnePlus
97006a756d v0.4.1 preview 2025-06-26 18:55:30 +08:00
Yijia Su
72865654e2 Merge pull request #2 from echo-hhj/example
[demo]add dify demo
2025-06-23 12:44:23 +08:00
HuangHaijun
050c09f902 fix contents 2025-06-19 13:10:32 +08:00
HuangHaijun
159399bd38 fix the way to start server 2025-06-19 13:03:17 +08:00
HuangHaijun
e859fbb778 fix1 2025-06-18 12:54:14 +08:00
HuangHaijun
1b9cb29f5f fix 2025-06-18 12:53:18 +08:00
HuangHaijun
c95c0fe03c add dify demo 2025-06-18 12:44:05 +08:00
FreeOnePlus
1e2e79d90d v0.4.0 preview 2025-06-12 19:36:16 +08:00
FreeOnePlus
609816bc4a fix doc bug 2025-06-12 05:10:07 +08:00
FreeOnePlus
5d46d153e1 1. Fix DB Connection BUG
2. Modify the global default configuration items and obtain them from Config
2025-06-11 11:52:15 +08:00
FreeOnePlus
0a81d5693b add readme QA module 2025-06-10 21:28:39 +08:00
FreeOnePlus
a4306867f6 Merge remote-tracking branch 'origin/master' 2025-06-10 21:11:05 +08:00
FreeOnePlus
a22ff3ae9b add readme QA module 2025-06-10 21:04:46 +08:00
Yijia Su
2c5f26889c Update .asf.yaml
open issue,discussions
2025-06-10 14:04:14 +08:00
FreeOnePlus
e47534c296 Merge remote-tracking branch 'origin/master' 2025-06-09 23:07:54 +08:00
FreeOnePlus
0f52591259 Add pip install command & fix pyproject.toml bug 2025-06-09 22:54:01 +08:00
Yijia Su
3b429f37b3 Merge pull request #1 from iouAkira/patch-1
fix Dockerfile
2025-06-09 18:48:58 +08:00
FreeOnePlus
f5a4c8abbe Add pip install command & fix pyproject.toml bug 2025-06-09 18:42:34 +08:00
FreeOnePlus
87563ef6e1 Add pip install command 2025-06-09 18:37:41 +08:00
Akira
b6157c500b fix Dockerfile 2025-06-09 09:21:42 +08:00
34 changed files with 4402 additions and 828 deletions

View File

@@ -28,5 +28,14 @@ github:
squash: true squash: true
merge: false merge: false
rebase: false rebase: false
features:
# Enable wiki for documentation
wiki: true
# Enable issue management
issues: true
# Enable projects for project management boardS
projects: true
# Enable discussions
discussions: true
notifications: notifications:
pullrequests_status: commits@doris.apache.org pullrequests_status: commits@doris.apache.org

View File

@@ -1,71 +1,90 @@
# Licensed to the Apache Software Foundation (ASF) under one # Doris MCP Server Configuration
# or more contributor license agreements. See the NOTICE file # Copy this file to .env and modify the values according to your environment
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Doris MCP Server Environment Configuration
# Copy this file to .env and modify the values as needed
# =============================================================================
# Database Configuration # Database Configuration
# =============================================================================
# Doris FE connection settings
DORIS_HOST=localhost DORIS_HOST=localhost
DORIS_PORT=9030 DORIS_PORT=9030
DORIS_USER=root DORIS_USER=root
DORIS_PASSWORD=your_password_here DORIS_PASSWORD=
DORIS_DATABASE=your_database_name DORIS_DATABASE=information_schema
# Doris FE HTTP API port
DORIS_FE_HTTP_PORT=8030
# BE nodes configuration for external access
# If DORIS_BE_HOSTS is empty, will use "show backends" to get BE nodes automatically
# Format: comma-separated list of BE host addresses
# Example: DORIS_BE_HOSTS=192.168.1.100,192.168.1.101,192.168.1.102
DORIS_BE_HOSTS=
# BE webserver port for HTTP APIs (memory tracker, metrics, etc.)
DORIS_BE_WEBSERVER_PORT=8040
# =============================================================================
# Connection Pool Configuration
# =============================================================================
# Connection Pool Settings
DORIS_MIN_CONNECTIONS=5 DORIS_MIN_CONNECTIONS=5
DORIS_MAX_CONNECTIONS=20 DORIS_MAX_CONNECTIONS=20
DORIS_CONNECTION_TIMEOUT=30 DORIS_CONNECTION_TIMEOUT=30
DORIS_HEALTH_CHECK_INTERVAL=60 DORIS_HEALTH_CHECK_INTERVAL=60
DORIS_MAX_CONNECTION_AGE=3600 DORIS_MAX_CONNECTION_AGE=3600
# Security Settings # =============================================================================
# Profile And Explain Max Data Size
# =============================================================================
MAX_RESPONSE_CONTENT_SIZE=4096
# =============================================================================
# Security Configuration
# =============================================================================
ENABLE_SECURITY_CHECK=true
BLOCKED_KEYWORDS="DROP,TRUNCATE,DELETE,SHUTDOWN,INSERT,UPDATE,CREATE,ALTER,GRANT,REVOKE,KILL"
AUTH_TYPE=token AUTH_TYPE=token
TOKEN_SECRET=your_256_bit_secret_key_here TOKEN_SECRET=your_secret_key_here
TOKEN_EXPIRY=3600 TOKEN_EXPIRY=3600
MAX_RESULT_ROWS=10000 MAX_RESULT_ROWS=10000
MAX_QUERY_COMPLEXITY=100
ENABLE_MASKING=true ENABLE_MASKING=true
# Performance Settings # =============================================================================
# Performance Configuration
# =============================================================================
ENABLE_QUERY_CACHE=true ENABLE_QUERY_CACHE=true
CACHE_TTL=300 CACHE_TTL=300
MAX_CACHE_SIZE=1000 MAX_CACHE_SIZE=1000
MAX_CONCURRENT_QUERIES=50 MAX_CONCURRENT_QUERIES=50
QUERY_TIMEOUT=300 QUERY_TIMEOUT=300
# =============================================================================
# Logging Configuration # Logging Configuration
LOG_LEVEL=INFO # =============================================================================
LOG_FILE_PATH=./log/doris-mcp-server.log
ENABLE_AUDIT=true LOG_LEVEL=INFO
AUDIT_FILE_PATH=./log/doris-mcp-audit.log LOG_FILE_PATH=
ENABLE_AUDIT=true
AUDIT_FILE_PATH=
# =============================================================================
# Monitoring Configuration
# =============================================================================
# Monitoring Settings
ENABLE_METRICS=true ENABLE_METRICS=true
METRICS_PORT=3001 METRICS_PORT=3001
METRICS_PATH=/metrics
HEALTH_CHECK_PORT=3002 HEALTH_CHECK_PORT=3002
HEALTH_CHECK_PATH=/health
ENABLE_ALERTS=false ENABLE_ALERTS=false
ALERT_WEBHOOK_URL= ALERT_WEBHOOK_URL=
# Server Settings # =============================================================================
SERVER_NAME=doris-mcp-server # Server Configuration
SERVER_VERSION=0.3.0 # =============================================================================
SERVER_PORT=3000
# Development Settings (for development environment only) SERVER_NAME=doris-mcp-server
DEBUG=false SERVER_VERSION=0.4.1
VERBOSE=false SERVER_PORT=3000

View File

@@ -47,7 +47,7 @@ COPY . .
RUN mkdir -p /app/logs /app/config /app/data RUN mkdir -p /app/logs /app/config /app/data
# Set permissions # Set permissions
RUN chmod +x /app/start.sh RUN chmod +x /app/start_server.sh
# Create non-root user # Create non-root user
RUN groupadd -r doris && useradd -r -g doris doris RUN groupadd -r doris && useradd -r -g doris doris
@@ -62,4 +62,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
EXPOSE 3000 3001 3002 EXPOSE 3000 3001 3002
# Start command # Start command
CMD ["/app/start.sh"] CMD ["/app/start_server.sh"]

499
README.md
View File

@@ -21,44 +21,123 @@ under the License.
Doris MCP (Model Context Protocol) Server is a backend service built with Python and FastAPI. It implements the MCP, allowing clients to interact with it through defined "Tools". It's primarily designed to connect to Apache Doris databases, potentially leveraging Large Language Models (LLMs) for tasks like converting natural language queries to SQL (NL2SQL), executing queries, and performing metadata management and analysis. Doris MCP (Model Context Protocol) Server is a backend service built with Python and FastAPI. It implements the MCP, allowing clients to interact with it through defined "Tools". It's primarily designed to connect to Apache Doris databases, potentially leveraging Large Language Models (LLMs) for tasks like converting natural language queries to SQL (NL2SQL), executing queries, and performing metadata management and analysis.
## 🚀 What's New in v0.3.0 ## 🚀 What's New in v0.4.2
- **🔄 Streamlined Communication**: Completely migrated from SSE to Streamable HTTP for better performance and reliability - **🔒 Enhanced Security Framework**: Comprehensive SQL security validation with configurable blocked keywords, SQL injection protection, and unified security configuration management
- **🏗 Unified Architecture**: Consolidated tools management with centralized registration and routing - **🛠 Connection Stability Improvements**: Fixed critical `at_eof` connection errors with advanced connection health monitoring, automatic retry mechanisms, and proactive connection cleanup
- **⚡ Enhanced Performance**: Improved query execution with advanced caching and optimization - **⚙️ Flexible Security Configuration**: Environment variable support for security policies (`BLOCKED_KEYWORDS`, `ENABLE_SECURITY_CHECK`) with unified configuration architecture eliminating code duplication
- **🔒 Enterprise Security**: Added comprehensive security management with SQL validation and data masking - **🎯 Centralized Configuration Management**: All security keywords now managed through single configuration source with consistent enforcement across all components
- **📊 Advanced Analytics**: New column analysis and performance monitoring tools - **🔧 MCP Version Compatibility**: Resolved MCP library version conflicts with intelligent compatibility layer supporting both MCP 1.8.x and 1.9.x versions
- **🛠️ Simplified Development**: Streamlined tool development process with unified interfaces - **🚀 Production Reliability**: Enhanced error handling, connection diagnostics, and automatic recovery from database connection issues
- **🙏 Community Contribution**: Special thanks to Hailin Xie for supporting the doris-mcp-server project by graciously transferring the PyPI project to the community free of charge, contributing to open source. The mcp-doris-server repository will be retained but no longer maintained, with ongoing development continuing on the doris-mcp-server repository
> **⚠️ Breaking Changes**: SSE endpoints have been removed. Please update your client configurations to use Streamable HTTP (`/mcp` endpoint). > **🔧 Key Improvements**: Resolved connection stability issues, unified security keyword management, added comprehensive environment variable configuration for security policies, and fixed MCP library version compatibility conflicts.
## Core Features ## Core Features
* **MCP Protocol Implementation**: Provides standard MCP interfaces, supporting tool calls, resource management, and prompt interactions. * **MCP Protocol Implementation**: Provides standard MCP interfaces, supporting tool calls, resource management, and prompt interactions.
* **Multiple Communication Modes** (Updated in v0.3.0): * **Streamable HTTP Communication**: Unified HTTP endpoint supporting both request/response and streaming communication for optimal performance and reliability.
* **Stdio**: Standard input/output mode for direct integration with MCP clients like Cursor. * **Stdio Communication**: Standard input/output mode for direct integration with MCP clients like Cursor.
* **Streamable HTTP**: Unified HTTP endpoint supporting request/response and streaming (Primary mode since v0.3.0).
> **⚠️ Breaking Change in v0.3.0**: SSE (Server-Sent Events) mode has been completely removed in favor of the more robust Streamable HTTP implementation.
* **Enterprise-Grade Architecture**: Modular design with comprehensive functionality: * **Enterprise-Grade Architecture**: Modular design with comprehensive functionality:
* **Tools Manager**: Centralized tool registration and routing (`doris_mcp_server/tools/tools_manager.py`) * **Tools Manager**: Centralized tool registration and routing with unified interfaces (`doris_mcp_server/tools/tools_manager.py`)
* **Enhanced Monitoring Tools Module**: Advanced memory tracking, metrics collection, and flexible BE node discovery with modular, extensible design
* **Query Information Tools**: Enhanced SQL explain and profiling with configurable content truncation, file export for LLM attachments, and advanced query analytics
* **Resources Manager**: Resource management and metadata exposure (`doris_mcp_server/tools/resources_manager.py`) * **Resources Manager**: Resource management and metadata exposure (`doris_mcp_server/tools/resources_manager.py`)
* **Prompts Manager**: Intelligent prompt templates for data analysis (`doris_mcp_server/tools/prompts_manager.py`) * **Prompts Manager**: Intelligent prompt templates for data analysis (`doris_mcp_server/tools/prompts_manager.py`)
* **Advanced Database Features**: * **Advanced Database Features**:
* **Query Execution**: High-performance SQL execution with caching and optimization (`doris_mcp_server/utils/query_executor.py`) * **Query Execution**: High-performance SQL execution with advanced caching and optimization, enhanced connection stability and automatic retry mechanisms (`doris_mcp_server/utils/query_executor.py`)
* **Security Management**: SQL security validation, data masking, and access control (`doris_mcp_server/utils/security.py`) * **Security Management**: Comprehensive SQL security validation with configurable blocked keywords, SQL injection protection, data masking, and unified security configuration management (`doris_mcp_server/utils/security.py`)
* **Metadata Extraction**: Comprehensive database metadata with catalog federation support (`doris_mcp_server/utils/schema_extractor.py`) * **Metadata Extraction**: Comprehensive database metadata with catalog federation support (`doris_mcp_server/utils/schema_extractor.py`)
* **Performance Analysis**: Column statistics, performance monitoring, and data analysis tools (`doris_mcp_server/utils/analysis_tools.py`) * **Performance Analysis**: Advanced column analysis, performance monitoring, and data analysis tools (`doris_mcp_server/utils/analysis_tools.py`)
* **Catalog Federation Support**: Full support for multi-catalog environments (internal Doris tables and external data sources like Hive, MySQL, etc.) * **Catalog Federation Support**: Full support for multi-catalog environments (internal Doris tables and external data sources like Hive, MySQL, etc.)
* **Enterprise Security**: Comprehensive security framework with authentication, authorization, SQL injection protection, and data masking (`doris_mcp_server/utils/security.py`) * **Enterprise Security**: Comprehensive security framework with authentication, authorization, SQL injection protection, and data masking capabilities with environment variable configuration support
* **Flexible Configuration**: Comprehensive configuration management with environment variables, file-based config, and validation (`doris_mcp_server/utils/config.py`) * **Unified Configuration Framework**: Centralized configuration management through `config.py` with comprehensive validation, standardized parameter naming, and smart default database handling with automatic fallback to `information_schema`
## System Requirements ## System Requirements
* Python 3.12+ * Python 3.12+
* Database connection details (e.g., Doris Host, Port, User, Password, Database) * Database connection details (e.g., Doris Host, Port, User, Password, Database)
## Quick Start ## 🚀 Quick Start
### Installation from PyPI
```bash
# Install the latest version
pip install doris-mcp-server
# Install specific version
pip install doris-mcp-server==0.4.2
```
> **💡 Command Compatibility**: After installation, both `doris-mcp-server` commands are available for backward compatibility. You can use either command interchangeably.
### Start Streamable HTTP Mode (Web Service)
The primary communication mode offering optimal performance and reliability:
```bash
# Full configuration with database connection
doris-mcp-server \
--transport http \
--host 0.0.0.0 \
--port 3000 \
--db-host 127.0.0.1 \
--db-port 9030 \
--db-user root \
--db-password your_password
```
### Start Stdio Mode (for Cursor and other MCP clients)
Standard input/output mode for direct integration with MCP clients:
```bash
# For direct integration with MCP clients like Cursor
doris-mcp-server --transport stdio
```
### Verify Installation
```bash
# Check installation
doris-mcp-server --help
# Test HTTP mode (in another terminal)
curl http://localhost:3000/health
```
### Environment Variables (Optional)
Instead of command-line arguments, you can use environment variables:
```bash
export DORIS_HOST="127.0.0.1"
export DORIS_PORT="9030"
export DORIS_USER="root"
export DORIS_PASSWORD="your_password"
# Then start with simplified command
doris-mcp-server --transport http --host 0.0.0.0 --port 3000
```
### Command Line Arguments
The `doris-mcp-server` command supports the following arguments:
| Argument | Description | Default | Required |
|:---------|:------------|:--------|:---------|
| `--transport` | Transport mode: `http` or `stdio` | `http` | No |
| `--host` | HTTP server host (HTTP mode only) | `0.0.0.0` | No |
| `--port` | HTTP server port (HTTP mode only) | `3000` | No |
| `--db-host` | Doris database host | `localhost` | No |
| `--db-port` | Doris database port | `9030` | No |
| `--db-user` | Doris database username | `root` | No |
| `--db-password` | Doris database password | - | Yes (unless in env) |
## Development Setup
For developers who want to build from source:
### 1. Clone the Repository ### 1. Clone the Repository
@@ -76,10 +155,10 @@ pip install -r requirements.txt
### 3. Configure Environment Variables ### 3. Configure Environment Variables
Copy the `env.example` file to `.env` and modify the settings according to your environment: Copy the `.env.example` file to `.env` and modify the settings according to your environment:
```bash ```bash
cp env.example .env cp .env.example .env
``` ```
**Key Environment Variables:** **Key Environment Variables:**
@@ -89,18 +168,23 @@ cp env.example .env
* `DORIS_PORT`: Database port (default: 9030) * `DORIS_PORT`: Database port (default: 9030)
* `DORIS_USER`: Database username (default: root) * `DORIS_USER`: Database username (default: root)
* `DORIS_PASSWORD`: Database password * `DORIS_PASSWORD`: Database password
* `DORIS_DATABASE`: Default database name (default: test) * `DORIS_DATABASE`: Default database name (default: information_schema)
* `DORIS_MIN_CONNECTIONS`: Minimum connection pool size (default: 5) * `DORIS_MIN_CONNECTIONS`: Minimum connection pool size (default: 5)
* `DORIS_MAX_CONNECTIONS`: Maximum connection pool size (default: 20) * `DORIS_MAX_CONNECTIONS`: Maximum connection pool size (default: 20)
* `DORIS_BE_HOSTS`: BE nodes for monitoring (comma-separated, optional - auto-discovery via SHOW BACKENDS if empty)
* `DORIS_BE_WEBSERVER_PORT`: BE webserver port for monitoring tools (default: 8040)
* **Security Configuration**: * **Security Configuration**:
* `AUTH_TYPE`: Authentication type (token/basic/oauth, default: token) * `AUTH_TYPE`: Authentication type (token/basic/oauth, default: token)
* `TOKEN_SECRET`: Token secret key * `TOKEN_SECRET`: Token secret key
* `ENABLE_SECURITY_CHECK`: Enable/disable SQL security validation (default: true, New in v0.4.2)
* `BLOCKED_KEYWORDS`: Comma-separated list of blocked SQL keywords (New in v0.4.2)
* `ENABLE_MASKING`: Enable data masking (default: true) * `ENABLE_MASKING`: Enable data masking (default: true)
* `MAX_RESULT_ROWS`: Maximum result rows (default: 10000) * `MAX_RESULT_ROWS`: Maximum result rows (default: 10000)
* **Performance Configuration**: * **Performance Configuration**:
* `ENABLE_QUERY_CACHE`: Enable query caching (default: true) * `ENABLE_QUERY_CACHE`: Enable query caching (default: true)
* `CACHE_TTL`: Cache time-to-live in seconds (default: 300) * `CACHE_TTL`: Cache time-to-live in seconds (default: 300)
* `MAX_CONCURRENT_QUERIES`: Maximum concurrent queries (default: 50) * `MAX_CONCURRENT_QUERIES`: Maximum concurrent queries (default: 50)
* `MAX_RESPONSE_CONTENT_SIZE`: Maximum response content size for LLM compatibility (default: 4096, New in v0.4.0)
* **Logging Configuration**: * **Logging Configuration**:
* `LOG_LEVEL`: Log level (DEBUG/INFO/WARNING/ERROR, default: INFO) * `LOG_LEVEL`: Log level (DEBUG/INFO/WARNING/ERROR, default: INFO)
* `LOG_FILE_PATH`: Log file path * `LOG_FILE_PATH`: Log file path
@@ -110,21 +194,26 @@ cp env.example .env
The following table lists the main tools currently available for invocation via an MCP client: The following table lists the main tools currently available for invocation via an MCP client:
| Tool Name | Description | Parameters | Status | | Tool Name | Description | Parameters |
|:----------------------------| :---------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------- | :------- | |-----------------------------|--------------------------------------------------------------|--------------------------------------------------------------|
| `exec_query` | Execute SQL query with catalog federation support. | `sql` (string, Required - MUST use three-part naming), `db_name` (string, Optional), `catalog_name` (string, Optional), `max_rows` (integer, Optional, default 100), `timeout` (integer, Optional, default 30) | ✅ Active | | `exec_query` | Execute SQL query and return results. | `sql` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional), `max_rows` (integer, Optional), `timeout` (integer, Optional) |
| `get_catalog_list` | Get a list of all catalogs with detailed information. | `random_string` (string, Required) | ✅ Active | | `get_table_schema` | Get detailed table structure information. | `table_name` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional) |
| `get_db_list` | Get a list of all database names in the specified catalog. | `catalog_name` (string, Optional, defaults to internal catalog) | ✅ Active | | `get_db_table_list` | Get list of all table names in specified database. | `db_name` (string, Optional), `catalog_name` (string, Optional) |
| `get_db_table_list` | Get a list of all table names in the specified database. | `db_name` (string, Optional), `catalog_name` (string, Optional) | ✅ Active | | `get_db_list` | Get list of all database names. | `catalog_name` (string, Optional) |
| `get_table_schema` | Get detailed structure of the specified table. | `table_name` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional) | ✅ Active | | `get_table_comment` | Get table comment information. | `table_name` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional) |
| `get_table_comment` | Get the comment for the specified table. | `table_name` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional) | ✅ Active | | `get_table_column_comments` | Get comment information for all columns in table. | `table_name` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional) |
| `get_table_column_comments` | Get comments for all columns in the specified table. | `table_name` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional) | ✅ Active | | `get_table_indexes` | Get index information for specified table. | `table_name` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional) |
| `get_table_indexes` | Get index information for the specified table. | `table_name` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional) | ✅ Active | | `get_recent_audit_logs` | Get audit log records for recent period. | `days` (integer, Optional), `limit` (integer, Optional) |
| `get_recent_audit_logs` | Get audit log records for a recent period. | `days` (integer, Optional, default 7), `limit` (integer, Optional, default 100) | ✅ Active | | `get_catalog_list` | Get list of all catalog names. | `random_string` (string, Required) |
| `column_analysis` | Analyze statistical information and data distribution. | `table_name` (string, Required), `column_name` (string, Required), `analysis_type` (string, Optional: basic/distribution/detailed) | ⚠️ Experimental | | `get_sql_explain` | Get SQL execution plan with configurable content truncation and file export for LLM analysis. | `sql` (string, Required), `verbose` (boolean, Optional), `db_name` (string, Optional), `catalog_name` (string, Optional) |
| `performance_stats` | Get database performance statistics information. | `metric_type` (string, Optional: queries/connections/tables/system), `time_range` (string, Optional: 1h/6h/24h/7d) | ⚠️ Experimental | | `get_sql_profile` | Get SQL execution profile with content management and file export for LLM optimization workflows. | `sql` (string, Required), `db_name` (string, Optional), `catalog_name` (string, Optional), `timeout` (integer, Optional) |
| `get_table_data_size` | Get table data size information via FE HTTP API. | `db_name` (string, Optional), `table_name` (string, Optional), `single_replica` (boolean, Optional) |
| `get_monitoring_metrics_info` | Get Doris monitoring metrics definitions and descriptions. | `role` (string, Optional), `monitor_type` (string, Optional), `priority` (string, Optional) |
| `get_monitoring_metrics_data` | Get actual Doris monitoring metrics data from nodes with flexible BE discovery. | `role` (string, Optional), `monitor_type` (string, Optional), `priority` (string, Optional) |
| `get_realtime_memory_stats` | Get real-time memory statistics via BE Memory Tracker with auto/manual BE discovery. | `tracker_type` (string, Optional), `include_details` (boolean, Optional) |
| `get_historical_memory_stats` | Get historical memory statistics via BE Bvar interface with flexible BE configuration. | `tracker_names` (array, Optional), `time_range` (string, Optional) |
**Note:** All metadata tools support catalog federation for multi-catalog environments. The `get_catalog_list` tool requires a `random_string` parameter for compatibility reasons. **Note:** All metadata tools support catalog federation for multi-catalog environments. The `get_catalog_list` tool requires a `random_string` parameter for compatibility reasons. Enhanced monitoring tools in v0.4.0 provide comprehensive memory tracking and metrics collection capabilities with flexible BE node discovery.
### 4. Run the Service ### 4. Run the Service
@@ -136,19 +225,18 @@ Execute the following command to start the server:
This command starts the FastAPI application with Streamable HTTP MCP service. This command starts the FastAPI application with Streamable HTTP MCP service.
**Service Endpoints (v0.3.0+):** **Service Endpoints:**
* **Streamable HTTP**: `http://<host>:<port>/mcp` (Primary MCP endpoint - supports GET, POST, DELETE, OPTIONS) * **Streamable HTTP**: `http://<host>:<port>/mcp` (Primary MCP endpoint - supports GET, POST, DELETE, OPTIONS)
* **Health Check**: `http://<host>:<port>/health` * **Health Check**: `http://<host>:<port>/health`
* **Status Check**: `http://<host>:<port>/status`
> **Note**: Starting from v0.3.0, only Streamable HTTP mode is supported for web-based communication. SSE endpoints have been removed. > **Note**: The server uses Streamable HTTP for web-based communication, providing unified request/response and streaming capabilities.
## Usage ## Usage
Interaction with the Doris MCP Server requires an **MCP Client**. The client connects to the server's Streamable HTTP endpoint and sends requests according to the MCP specification to invoke the server's tools. Interaction with the Doris MCP Server requires an **MCP Client**. The client connects to the server's Streamable HTTP endpoint and sends requests according to the MCP specification to invoke the server's tools.
**Main Interaction Flow (v0.3.0+):** **Main Interaction Flow:**
1. **Client Initialization**: Send an `initialize` method call to `/mcp` (Streamable HTTP). 1. **Client Initialization**: Send an `initialize` method call to `/mcp` (Streamable HTTP).
2. **(Optional) Discover Tools**: The client can call `tools/list` to get the list of supported tools, their descriptions, and parameter schemas. 2. **(Optional) Discover Tools**: The client can call `tools/list` to get the list of supported tools, their descriptions, and parameter schemas.
@@ -160,8 +248,6 @@ Interaction with the Doris MCP Server requires an **MCP Client**. The client con
* **Non-streaming**: The client receives a response containing `content` or `isError`. * **Non-streaming**: The client receives a response containing `content` or `isError`.
* **Streaming**: The client receives a series of progress notifications, followed by a final response. * **Streaming**: The client receives a series of progress notifications, followed by a final response.
> **Migration Note**: If you're upgrading from v0.2.x, note that tool names have been simplified (removed `mcp_doris_` prefix) and the communication protocol has been updated to use Streamable HTTP exclusively.
### Catalog Federation Support ### Catalog Federation Support
The Doris MCP Server supports **catalog federation**, enabling interaction with multiple data catalogs (internal Doris tables and external data sources like Hive, MySQL, etc.) within a unified interface. The Doris MCP Server supports **catalog federation**, enabling interaction with multiple data catalogs (internal Doris tables and external data sources like Hive, MySQL, etc.) within a unified interface.
@@ -230,7 +316,7 @@ The Doris MCP Server supports **catalog federation**, enabling interaction with
} }
``` ```
## Security Configuration (v0.3.0+) ## Security Configuration
The Doris MCP Server includes a comprehensive security framework that provides enterprise-level protection through authentication, authorization, SQL security validation, and data masking capabilities. The Doris MCP Server includes a comprehensive security framework that provides enterprise-level protection through authentication, authorization, SQL security validation, and data masking capabilities.
@@ -322,16 +408,25 @@ The system automatically validates SQL queries for security risks:
#### Blocked Operations #### Blocked Operations
Configure blocked SQL operations: Configure blocked SQL operations using environment variables (New in v0.4.2):
```bash ```bash
# Environment variable # Enable/disable SQL security check (New in v0.4.2)
BLOCKED_SQL_OPERATIONS=DROP,DELETE,TRUNCATE,ALTER,CREATE,INSERT,UPDATE,GRANT,REVOKE ENABLE_SECURITY_CHECK=true
# Customize blocked keywords via environment variable (New in v0.4.2)
BLOCKED_KEYWORDS="DROP,DELETE,TRUNCATE,ALTER,CREATE,INSERT,UPDATE,GRANT,REVOKE,EXEC,EXECUTE,SHUTDOWN,KILL"
# Maximum query complexity score # Maximum query complexity score
MAX_QUERY_COMPLEXITY=100 MAX_QUERY_COMPLEXITY=100
``` ```
**Default Blocked Keywords (Unified in v0.4.2):**
- **DDL Operations**: DROP, CREATE, ALTER, TRUNCATE
- **DML Operations**: DELETE, INSERT, UPDATE
- **DCL Operations**: GRANT, REVOKE
- **System Operations**: EXEC, EXECUTE, SHUTDOWN, KILL
#### SQL Injection Protection #### SQL Injection Protection
The system automatically detects and blocks: The system automatically detects and blocks:
@@ -481,9 +576,36 @@ You can connect Cursor to this MCP server using Stdio mode (recommended) or Stre
Stdio mode allows Cursor to manage the server process directly. Configuration is done within Cursor's MCP Server settings file (typically `~/.cursor/mcp.json` or similar). Stdio mode allows Cursor to manage the server process directly. Configuration is done within Cursor's MCP Server settings file (typically `~/.cursor/mcp.json` or similar).
### Using uv (Recommended) ### Method 1: Using PyPI Installation (Recommended)
If you have `uv` installed, you can run the server directly: Install the package from PyPI and configure Cursor to use it:
```bash
pip install mcp-doris-server
```
**Configure Cursor:** Add an entry like the following to your Cursor MCP configuration:
```json
{
"mcpServers": {
"doris-stdio": {
"command": "doris-mcp-server",
"args": ["--transport", "stdio"],
"env": {
"DORIS_HOST": "127.0.0.1",
"DORIS_PORT": "9030",
"DORIS_USER": "root",
"DORIS_PASSWORD": "your_db_password"
}
}
}
}
```
### Method 2: Using uv (Development)
If you have `uv` installed and want to run from source:
```bash ```bash
uv run --project /path/to/doris-mcp-server doris-mcp-server uv run --project /path/to/doris-mcp-server doris-mcp-server
@@ -491,7 +613,7 @@ uv run --project /path/to/doris-mcp-server doris-mcp-server
**Note:** Replace `/path/to/doris-mcp-server` with the actual absolute path to your project directory. **Note:** Replace `/path/to/doris-mcp-server` with the actual absolute path to your project directory.
1. **Configure Cursor:** Add an entry like the following to your Cursor MCP configuration: **Configure Cursor:** Add an entry like the following to your Cursor MCP configuration:
```json ```json
{ {
@@ -503,22 +625,14 @@ uv run --project /path/to/doris-mcp-server doris-mcp-server
"DORIS_HOST": "127.0.0.1", "DORIS_HOST": "127.0.0.1",
"DORIS_PORT": "9030", "DORIS_PORT": "9030",
"DORIS_USER": "root", "DORIS_USER": "root",
"DORIS_PASSWORD": "your_db_password", "DORIS_PASSWORD": "your_db_password"
"DORIS_DATABASE": "your_default_db",
"LOG_LEVEL": "INFO"
} }
} }
} }
} }
``` ```
2. **Key Points:** ### Streamable HTTP Mode
* Replace `/path/to/your/doris-mcp-server` with the actual absolute path to the project's root directory on your system.
* The `--project` argument is crucial for `uv` to find the `pyproject.toml` and run the correct command.
* Database connection details are set directly in the `env` block. Cursor will pass these to the server process.
* No `.env` file is needed for this mode when configured via Cursor.
### Streamable HTTP Mode (v0.3.0+)
Streamable HTTP mode requires you to run the MCP server independently first, and then configure Cursor to connect to it. Streamable HTTP mode requires you to run the MCP server independently first, and then configure Cursor to connect to it.
@@ -540,12 +654,10 @@ Streamable HTTP mode requires you to run the MCP server independently first, and
} }
``` ```
> **Note**: Adjust the host/port if your server runs on a different address. The `/mcp` endpoint is the unified Streamable HTTP interface introduced in v0.3.0. > **Note**: Adjust the host/port if your server runs on a different address. The `/mcp` endpoint is the unified Streamable HTTP interface.
After configuring either mode in Cursor, you should be able to select the server (e.g., `doris-stdio` or `doris-http`) and use its tools. After configuring either mode in Cursor, you should be able to select the server (e.g., `doris-stdio` or `doris-http`) and use its tools.
> **⚠️ Migration from v0.2.x**: If you were using SSE mode (`/sse` endpoint), update your configuration to use the new Streamable HTTP endpoint (`/mcp`).
## Directory Structure ## Directory Structure
``` ```
@@ -584,22 +696,22 @@ doris-mcp-server/
## Developing New Tools ## Developing New Tools
This section outlines the process for adding new MCP tools to the Doris MCP Server, based on the current modular architecture. This section outlines the process for adding new MCP tools to the Doris MCP Server, based on the unified modular architecture with centralized tool management.
### 1. Leverage Existing Utility Modules ### 1. Leverage Existing Utility Modules
The server provides comprehensive utility modules for common database operations: The server provides comprehensive utility modules for common database operations:
* **`doris_mcp_server/utils/db.py`**: Database connection management with connection pooling and health monitoring. * **`doris_mcp_server/utils/db.py`**: Database connection management with connection pooling and health monitoring.
* **`doris_mcp_server/utils/query_executor.py`**: High-performance SQL execution with caching, optimization, and performance monitoring. * **`doris_mcp_server/utils/query_executor.py`**: High-performance SQL execution with advanced caching, optimization, and performance monitoring.
* **`doris_mcp_server/utils/schema_extractor.py`**: Metadata extraction with full catalog federation support. * **`doris_mcp_server/utils/schema_extractor.py`**: Metadata extraction with full catalog federation support.
* **`doris_mcp_server/utils/security.py`**: Security management, SQL validation, and data masking. * **`doris_mcp_server/utils/security.py`**: Comprehensive security management, SQL validation, and data masking.
* **`doris_mcp_server/utils/analysis_tools.py`**: Data analysis and statistical tools. * **`doris_mcp_server/utils/analysis_tools.py`**: Advanced data analysis and statistical tools.
* **`doris_mcp_server/utils/config.py`**: Configuration management with validation. * **`doris_mcp_server/utils/config.py`**: Configuration management with validation.
### 2. Implement Tool Logic ### 2. Implement Tool Logic
Add your new tool to the `DorisToolsManager` class in `doris_mcp_server/tools/tools_manager.py`. The tools manager provides a centralized approach to tool registration and execution. Add your new tool to the `DorisToolsManager` class in `doris_mcp_server/tools/tools_manager.py`. The tools manager provides a centralized approach to tool registration and execution with unified interfaces.
**Example:** Adding a new analysis tool: **Example:** Adding a new analysis tool:
@@ -668,12 +780,13 @@ async def your_new_analysis_tool_wrapper(arguments: Dict[str, Any]) -> List[Dict
### 4. Advanced Features ### 4. Advanced Features
For more complex tools, you can leverage: For more complex tools, you can leverage the comprehensive framework:
* **Caching**: Use the query executor's built-in caching for performance * **Advanced Caching**: Use the query executor's built-in caching for enhanced performance
* **Security**: Apply SQL validation and data masking through the security manager * **Enterprise Security**: Apply comprehensive SQL validation and data masking through the security manager
* **Prompts**: Use the prompts manager for intelligent query generation * **Intelligent Prompts**: Use the prompts manager for advanced query generation
* **Resources**: Expose metadata through the resources manager * **Resource Management**: Expose metadata through the resources manager
* **Performance Monitoring**: Integrate with the analysis tools for monitoring capabilities
### 5. Testing ### 5. Testing
@@ -704,4 +817,242 @@ Contributions are welcome via Issues or Pull Requests.
## License ## License
This project is licensed under the Apache 2.0 License. See the LICENSE file (if it exists) for details. This project is licensed under the Apache 2.0 License. See the LICENSE file for details.
## FAQ
### Q: Why do Qwen3-32b and other small parameter models always fail when calling tools?
**A:** This is a common issue. The main reason is that these models need more explicit guidance to correctly use MCP tools. It's recommended to add the following instruction prompt for the model:
- Chinese version
```xml
<instruction>
尽可能使用MCP工具完成任务仔细阅读每个工具的注解、方法名、参数说明等内容。请按照以下步骤操作
1. 仔细分析用户的问题从已有的Tools列表中匹配最合适的工具。
2. 确保工具名称、方法名和参数完全按照工具注释中的定义使用,不要自行创造工具名称或参数。
3. 传入参数时,严格遵循工具注释中规定的参数格式和要求。
4. 调用工具时,根据需要直接调用工具,但参数请求参考以下请求格式:{"mcp_sse_call_tool": {"tool_name": "$tools_name", "arguments": "{}"}}
5. 输出结果时不要包含任何XML标签仅返回纯文本内容。
<input>
用户问题user_query
</input>
<output>
返回工具调用结果或最终答案,以及对结果的分析。
</output>
</instruction>
```
- English version
```xml
<instruction>
Use MCP tools to complete tasks as much as possible. Carefully read the annotations, method names, and parameter descriptions of each tool. Please follow these steps:
1. Carefully analyze the user's question and match the most appropriate tool from the existing Tools list.
2. Ensure tool names, method names, and parameters are used exactly as defined in the tool annotations. Do not create tool names or parameters on your own.
3. When passing parameters, strictly follow the parameter format and requirements specified in the tool annotations.
4. When calling tools, call them directly as needed, but refer to the following request format for parameters: {"mcp_sse_call_tool": {"tool_name": "$tools_name", "arguments": "{}"}}
5. When outputting results, do not include any XML tags, return plain text content only.
<input>
User question: user_query
</input>
<output>
Return tool call results or final answer, along with analysis of the results.
</output>
</instruction>
```
If you have further requirements for the returned results, you can describe the specific requirements in the `<output>` tag.
### Q: How to configure different database connections?
**A:** You can configure database connections in several ways:
1. **Environment Variables** (Recommended):
```bash
export DORIS_HOST="your_doris_host"
export DORIS_PORT="9030"
export DORIS_USER="root"
export DORIS_PASSWORD="your_password"
```
2. **Command Line Arguments**:
```bash
doris-mcp-server --db-host your_host --db-port 9030 --db-user root --db-password your_password
```
3. **Configuration File**:
Modify the corresponding configuration items in the `.env` file.
### Q: How to configure BE nodes for monitoring tools?
**A:** Choose the appropriate configuration based on your deployment scenario:
**External Network (Manual Configuration):**
```bash
# Manually specify BE node addresses
DORIS_BE_HOSTS=10.1.1.100,10.1.1.101,10.1.1.102
DORIS_BE_WEBSERVER_PORT=8040
```
**Internal Network (Automatic Discovery):**
```bash
# Leave BE_HOSTS empty for auto-discovery
# DORIS_BE_HOSTS= # Not set or empty
# System will use 'SHOW BACKENDS' command to get internal IPs
```
### Q: How to use SQL Explain/Profile files with LLM for optimization?
**A:** The tools provide both truncated content and complete files for LLM analysis:
1. **Get Analysis Results:**
```json
{
"content": "Truncated plan for immediate review",
"file_path": "/tmp/explain_12345.txt",
"is_content_truncated": true
}
```
2. **LLM Analysis Workflow:**
- Review truncated content for quick insights
- Upload the complete file to your LLM as an attachment
- Request optimization suggestions or performance analysis
- Implement recommended improvements
3. **Configure Content Size:**
```bash
MAX_RESPONSE_CONTENT_SIZE=4096 # Adjust as needed
```
### Q: How to enable data security and masking features?
**A:** Set the following configurations in your `.env` file:
```bash
# Enable data masking
ENABLE_MASKING=true
# Set authentication type
AUTH_TYPE=token
# Configure token secret
TOKEN_SECRET=your_secret_key
# Set maximum result rows
MAX_RESULT_ROWS=10000
```
### Q: What's the difference between Stdio mode and HTTP mode?
**A:**
- **Stdio Mode**: Suitable for direct integration with MCP clients (like Cursor), where the client manages the server process
- **HTTP Mode**: Independent web service that supports multiple client connections, suitable for production environments
Recommendations:
- Development and personal use: Stdio mode
- Production and multi-user environments: HTTP mode
### Q: How to resolve connection timeout issues?
**A:** Try the following solutions:
1. **Increase timeout settings**:
```bash
# Set in .env file
QUERY_TIMEOUT=60
CONNECTION_TIMEOUT=30
```
2. **Check network connectivity**:
```bash
# Test database connection
curl http://localhost:3000/health
```
3. **Optimize connection pool configuration**:
```bash
DORIS_MIN_CONNECTIONS=5
DORIS_MAX_CONNECTIONS=20
```
### Q: How to resolve `at_eof` connection errors? (Fixed in v0.4.2)
**A:** Version 0.4.2 has resolved the critical `at_eof` connection errors. The improvements include:
1. **Enhanced Connection Health Monitoring**: Strict connection state validation before operations
2. **Automatic Retry Mechanism**: Failed queries are automatically retried up to 2 times
3. **Proactive Connection Cleanup**: Automatic detection and cleanup of problematic connections
4. **Connection Diagnostics**: Comprehensive connection health analysis and reporting
If you still encounter connection issues after upgrading to v0.4.2:
```bash
# Check connection diagnostics
# The system now automatically handles connection recovery
# Monitor logs for connection health reports
tail -f logs/doris_mcp_server.log | grep "connection"
```
### Q: How to resolve MCP library version compatibility issues? (Fixed in v0.4.2)
**A:** Version 0.4.2 introduced an intelligent MCP compatibility layer that supports both MCP 1.8.x and 1.9.x versions:
**The Problem:**
- MCP 1.9.3 introduced breaking changes to the `RequestContext` class (changed from 2 to 3 generic parameters)
- This caused `TypeError: Too few arguments for RequestContext` errors
**The Solution (v0.4.2):**
- **Intelligent Version Detection**: Automatically detects the installed MCP version
- **Compatibility Layer**: Gracefully handles API differences between versions
- **Flexible Version Support**: `mcp>=1.8.0,<2.0.0` in dependencies
**Supported MCP Versions:**
```bash
# Both versions now work seamlessly
pip install mcp==1.8.0 # Stable version (recommended)
pip install mcp==1.9.3 # Latest version with new features
```
**Version Information:**
```bash
# Check which MCP version is being used
doris-mcp-server --transport stdio
# The server will log: "Using MCP version: x.x.x"
```
If you encounter MCP-related startup errors:
```bash
# Recommended: Use stable version
pip uninstall mcp
pip install mcp==1.8.0
# Or upgrade to latest compatible version
pip install --upgrade mcp-doris-server==0.4.2
```
### Q: How to view server logs?
**A:** Log files are located in the `logs/` directory. You can:
1. **View real-time logs**:
```bash
tail -f logs/doris_mcp_server.log
```
2. **Adjust log level**:
```bash
# Set in .env file
LOG_LEVEL=DEBUG
```
3. **Enable audit logging**:
```bash
ENABLE_AUDIT=true
```
For other issues, please check GitHub Issues or submit a new issue.

View File

@@ -133,9 +133,6 @@ async def database_operations(client):
# Get table schema # Get table schema
schema = await client.get_table_schema("table_name", "db_name") schema = await client.get_table_schema("table_name", "db_name")
# Column data analysis
analysis = await client.analyze_column("table", "column", "basic")
``` ```
## 🧪 Testing ## 🧪 Testing
@@ -177,7 +174,6 @@ python test_unified_client.py benchmark
2. get_table_list: Get table list for specified database 2. get_table_list: Get table list for specified database
3. get_table_schema: Get table structure information 3. get_table_schema: Get table structure information
4. exec_query: Execute SQL query 4. exec_query: Execute SQL query
5. column_analysis: Analyze column data distribution and statistics
... ...
🧪 Testing basic functionality... 🧪 Testing basic functionality...
@@ -189,8 +185,6 @@ python test_unified_client.py benchmark
✅ SSB query successful ✅ SSB query successful
4⃣ Getting table structure... 4⃣ Getting table structure...
✅ Table structure retrieved successfully ✅ Table structure retrieved successfully
5⃣ Column data analysis...
✅ Column analysis successful
✅ HTTP mode testing completed! ✅ HTTP mode testing completed!
``` ```
@@ -256,12 +250,6 @@ async def comprehensive_example():
schema_result = await client.get_table_schema("lineorder", "ssb") schema_result = await client.get_table_schema("lineorder", "ssb")
print(f"Table schema: {schema_result}") print(f"Table schema: {schema_result}")
# Column analysis
analysis_result = await client.analyze_column(
"lineorder", "lo_orderkey", "basic"
)
print(f"Column analysis: {analysis_result}")
await client.connect_and_run(demo_operations) await client.connect_and_run(demo_operations)
# Run the example # Run the example

View File

@@ -422,18 +422,14 @@ class DorisUnifiedClient:
return await self.call_tool(tool_name, kwargs) return await self.call_tool(tool_name, kwargs)
async def analyze_column(self, table_name: str, column_name: str, analysis_type: str = "basic", **kwargs) -> dict[str, Any]: async def get_memory_stats(self, tracker_type: str = "overview", include_details: bool = True, **kwargs) -> dict[str, Any]:
"""Analyze column""" """Get memory statistics"""
tool_name = await self._find_tool_by_pattern(["column_analysis", "analyze_column", "column"]) tool_name = await self._find_tool_by_pattern(["memory", "realtime_memory"])
if not tool_name: if not tool_name:
return {"success": False, "error": "Column analysis tool not found"} return {"success": False, "error": "Memory stats tool not found"}
arguments = { arguments = {"tracker_type": tracker_type, "include_details": include_details}
"table_name": table_name, arguments.update(kwargs)
"column_name": column_name,
"analysis_type": analysis_type,
**kwargs
}
return await self.call_tool(tool_name, arguments) return await self.call_tool(tool_name, arguments)
async def call_tool_by_function(self, function_description: str, arguments: dict[str, Any]) -> dict[str, Any]: async def call_tool_by_function(self, function_description: str, arguments: dict[str, Any]) -> dict[str, Any]:

View File

@@ -28,6 +28,17 @@ import json
import logging import logging
from typing import Any from typing import Any
# MCP version compatibility check
try:
import mcp
MCP_VERSION = getattr(mcp, '__version__', 'unknown')
logger = logging.getLogger(__name__)
logger.info(f"Using MCP version: {MCP_VERSION}")
except Exception as e:
logger = logging.getLogger(__name__)
logger.warning(f"Could not determine MCP version: {e}")
MCP_VERSION = 'unknown'
from mcp.server import Server from mcp.server import Server
from mcp.server.models import InitializationOptions from mcp.server.models import InitializationOptions
@@ -44,11 +55,15 @@ from .tools.resources_manager import DorisResourcesManager
from .utils.config import DorisConfig from .utils.config import DorisConfig
from .utils.db import DorisConnectionManager from .utils.db import DorisConnectionManager
from .utils.security import DorisSecurityManager from .utils.security import DorisSecurityManager
import os
# Configure logging # Configure logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Create a default config instance for getting default values
_default_config = DorisConfig()
class DorisServer: class DorisServer:
"""Apache Doris MCP Server main class""" """Apache Doris MCP Server main class"""
@@ -71,6 +86,47 @@ class DorisServer:
self.logger = logging.getLogger(f"{__name__}.DorisServer") self.logger = logging.getLogger(f"{__name__}.DorisServer")
self._setup_handlers() self._setup_handlers()
def _get_mcp_capabilities(self):
"""Get MCP capabilities with version compatibility"""
try:
# For MCP 1.9.x and newer
from mcp.server.lowlevel.server import NotificationOptions
return self.server.get_capabilities(
notification_options=NotificationOptions(
prompts_changed=True,
resources_changed=True,
tools_changed=True
),
experimental_capabilities={}
)
except TypeError:
try:
# For MCP 1.8.x
from mcp.server.lowlevel.server import NotificationOptions
return self.server.get_capabilities(
notification_options=NotificationOptions(
prompts_changed=True,
resources_changed=True,
tools_changed=True
),
experimental_capabilities={}
)
except Exception as e:
self.logger.warning(f"Could not get capabilities with NotificationOptions: {e}")
# Fallback for older versions
try:
return self.server.get_capabilities()
except Exception as fallback_e:
self.logger.error(f"Failed to get capabilities: {fallback_e}")
# Return minimal capabilities
return {
"resources": {},
"tools": {},
"prompts": {}
}
def _setup_handlers(self): def _setup_handlers(self):
"""Setup MCP protocol handlers""" """Setup MCP protocol handlers"""
@@ -189,22 +245,12 @@ class DorisServer:
read_stream, write_stream = streams read_stream, write_stream = streams
self.logger.info("stdio_server streams created successfully") self.logger.info("stdio_server streams created successfully")
# Create initialization options # Create initialization options with version compatibility
# MCP 1.8.0 requires parameters for get_capabilities capabilities = self._get_mcp_capabilities()
from mcp.server.lowlevel.server import NotificationOptions
capabilities = self.server.get_capabilities(
notification_options=NotificationOptions(
prompts_changed=True,
resources_changed=True,
tools_changed=True
),
experimental_capabilities={}
)
init_options = InitializationOptions( init_options = InitializationOptions(
server_name="doris-mcp-server", server_name="doris-mcp-server",
server_version="1.0.0", server_version=os.getenv("SERVER_VERSION", _default_config.server_version),
capabilities=capabilities, capabilities=capabilities,
) )
self.logger.info("Initialization options created successfully") self.logger.info("Initialization options created successfully")
@@ -237,7 +283,7 @@ class DorisServer:
async def start_http(self, host: str = "localhost", port: int = 3000): async def start_http(self, host: str = os.getenv("SERVER_HOST", _default_config.database.host), port: int = os.getenv("SERVER_PORT", _default_config.server_port)):
"""Start Streamable HTTP transport mode""" """Start Streamable HTTP transport mode"""
self.logger.info(f"Starting Doris MCP Server (Streamable HTTP mode) - {host}:{port}") self.logger.info(f"Starting Doris MCP Server (Streamable HTTP mode) - {host}:{port}")
@@ -251,9 +297,9 @@ class DorisServer:
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
from mcp.server.streamable_http_manager import StreamableHTTPSessionManager from mcp.server.streamable_http_manager import StreamableHTTPSessionManager
from starlette.applications import Starlette from starlette.applications import Starlette
from starlette.routing import Mount, Route from starlette.routing import Route
from starlette.responses import JSONResponse, Response from starlette.responses import JSONResponse, Response
from starlette.types import Receive, Scope, Send from starlette.types import Scope
# Create session manager # Create session manager
session_manager = StreamableHTTPSessionManager( session_manager = StreamableHTTPSessionManager(
@@ -413,34 +459,34 @@ Examples:
"--transport", "--transport",
type=str, type=str,
choices=["stdio", "http"], choices=["stdio", "http"],
default="stdio", default=os.getenv("TRANSPORT", _default_config.transport),
help="Transport protocol type: stdio (local), http (Streamable HTTP)", help=f"Transport protocol type: stdio (local), http (Streamable HTTP) (default: {_default_config.transport})",
) )
parser.add_argument( parser.add_argument(
"--host", "--host",
type=str, type=str,
default="localhost", default=os.getenv("SERVER_HOST", _default_config.database.host),
help="Host address for HTTP mode (default: localhost)", help=f"Host address for HTTP mode (default: {_default_config.database.host})",
) )
parser.add_argument( parser.add_argument(
"--port", type=int, default=3000, help="Port number for HTTP mode (default: 3000)" "--port", type=int, default=os.getenv("SERVER_PORT", _default_config.server_port), help=f"Port number for HTTP mode (default: {_default_config.server_port})"
) )
parser.add_argument( parser.add_argument(
"--db-host", "--db-host",
type=str, type=str,
default="localhost", default=os.getenv("DB_HOST", _default_config.database.host),
help="Doris database host address (default: localhost)", help=f"Doris database host address (default: {_default_config.database.host})",
) )
parser.add_argument( parser.add_argument(
"--db-port", type=int, default=9030, help="Doris database port number (default: 9030)" "--db-port", type=int, default=os.getenv("DB_PORT", _default_config.database.port), help=f"Doris database port number (default: {_default_config.database.port})"
) )
parser.add_argument( parser.add_argument(
"--db-user", type=str, default="root", help="Doris database username (default: root)" "--db-user", type=str, default=os.getenv("DB_USER", _default_config.database.user), help=f"Doris database username (default: {_default_config.database.user})"
) )
parser.add_argument("--db-password", type=str, default="", help="Doris database password") parser.add_argument("--db-password", type=str, default="", help="Doris database password")
@@ -448,16 +494,16 @@ Examples:
parser.add_argument( parser.add_argument(
"--db-database", "--db-database",
type=str, type=str,
default="information_schema", default=os.getenv("DB_DATABASE", _default_config.database.database),
help="Doris database name (default: information_schema)", help=f"Doris database name (default: {_default_config.database.database})",
) )
parser.add_argument( parser.add_argument(
"--log-level", "--log-level",
type=str, type=str,
choices=["DEBUG", "INFO", "WARNING", "ERROR"], choices=["DEBUG", "INFO", "WARNING", "ERROR"],
default="INFO", default=os.getenv("LOG_LEVEL", _default_config.logging.level),
help="Log level (default: INFO)", help=f"Log level (default: {_default_config.logging.level})",
) )
return parser return parser
@@ -475,17 +521,17 @@ async def main():
config = DorisConfig.from_env() # First load from .env file and environment variables config = DorisConfig.from_env() # First load from .env file and environment variables
# Command line arguments override configuration (if provided) # Command line arguments override configuration (if provided)
if args.db_host != "localhost": # If not default value, use command line argument if args.db_host != _default_config.database.host: # If not default value, use command line argument
config.database.host = args.db_host config.database.host = args.db_host
if args.db_port != 9030: if args.db_port != _default_config.database.port:
config.database.port = args.db_port config.database.port = args.db_port
if args.db_user != "root": if args.db_user != _default_config.database.user:
config.database.user = args.db_user config.database.user = args.db_user
if args.db_password: # Use password if provided if args.db_password: # Use password if provided
config.database.password = args.db_password config.database.password = args.db_password
if args.db_database != "information_schema": if args.db_database != _default_config.database.database:
config.database.database = args.db_database config.database.database = args.db_database
if args.log_level != "INFO": if args.log_level != _default_config.logging.level:
config.logging.level = args.log_level config.logging.level = args.log_level
# Create server instance # Create server instance

View File

@@ -28,7 +28,8 @@ from mcp.types import Tool
from ..utils.db import DorisConnectionManager from ..utils.db import DorisConnectionManager
from ..utils.query_executor import DorisQueryExecutor from ..utils.query_executor import DorisQueryExecutor
from ..utils.analysis_tools import TableAnalyzer, PerformanceMonitor from ..utils.analysis_tools import TableAnalyzer, SQLAnalyzer, MemoryTracker
from ..utils.monitoring_tools import DorisMonitoringTools
from ..utils.schema_extractor import MetadataExtractor from ..utils.schema_extractor import MetadataExtractor
from ..utils.logger import get_logger from ..utils.logger import get_logger
@@ -45,8 +46,10 @@ class DorisToolsManager:
# Initialize business logic processors # Initialize business logic processors
self.query_executor = DorisQueryExecutor(connection_manager) self.query_executor = DorisQueryExecutor(connection_manager)
self.table_analyzer = TableAnalyzer(connection_manager) self.table_analyzer = TableAnalyzer(connection_manager)
self.performance_monitor = PerformanceMonitor(connection_manager) self.sql_analyzer = SQLAnalyzer(connection_manager)
self.metadata_extractor = MetadataExtractor(connection_manager=connection_manager) self.metadata_extractor = MetadataExtractor(connection_manager=connection_manager)
self.monitoring_tools = DorisMonitoringTools(connection_manager)
self.memory_tracker = MemoryTracker(connection_manager)
logger.info("DorisToolsManager initialized with business logic processors") logger.info("DorisToolsManager initialized with business logic processors")
@@ -54,99 +57,6 @@ class DorisToolsManager:
"""Register all tools to MCP server""" """Register all tools to MCP server"""
logger.info("Starting to register MCP tools") logger.info("Starting to register MCP tools")
# Column statistical analysis tool
@mcp.tool(
"column_analysis",
description="""[Function Description]: Analyze statistical information and data distribution of the specified column.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to analyze
- column_name (string) [Required] - Name of the column to analyze
- analysis_type (string) [Optional] - Type of analysis to perform, default is "basic"
* "basic": Basic statistics (count, null values, distinct values)
* "distribution": Data distribution analysis (frequency, percentiles)
* "detailed": Comprehensive analysis including all above plus patterns and outliers
""",
inputSchema={
"type": "object",
"properties": {
"table_name": {"type": "string", "description": "Table name"},
"column_name": {
"type": "string",
"description": "Column name to analyze",
},
"analysis_type": {
"type": "string",
"enum": ["basic", "distribution", "detailed"],
"description": "Analysis type",
"default": "basic",
},
},
"required": ["table_name", "column_name"],
}
)
async def column_analysis_tool(
table_name: str,
column_name: str,
analysis_type: str = "basic"
) -> str:
"""Column statistical analysis tool"""
return await self.call_tool("column_analysis", {
"table_name": table_name,
"column_name": column_name,
"analysis_type": analysis_type
})
# Database performance monitoring tool
@mcp.tool(
"performance_stats[Experimental]",
description="""[Important]: This tool is experimental and may not be fully functional!
[Function Description]: Get database performance statistics information.
[Parameter Content]:
- metric_type (string) [Optional] - Type of performance metrics to retrieve, default is "queries"
* "queries": Query performance metrics (execution time, frequency, etc.)
* "connections": Connection statistics (active connections, connection pool status)
* "tables": Table-level statistics (size, row count, access patterns)
* "system": System-level metrics (CPU, memory, disk usage)
- time_range (string) [Optional] - Time range for statistics, default is "1h"
* "1h": Last 1 hour
* "6h": Last 6 hours
* "24h": Last 24 hours
* "7d": Last 7 days
""",
inputSchema={
"type": "object",
"properties": {
"metric_type": {
"type": "string",
"enum": ["queries", "connections", "tables", "system"],
"description": "Performance metric type",
"default": "queries",
},
"time_range": {
"type": "string",
"enum": ["1h", "6h", "24h", "7d"],
"description": "Time range",
"default": "1h",
},
},
}
)
async def performance_stats_tool(
metric_type: str = "queries",
time_range: str = "1h"
) -> str:
"""Database performance monitoring tool"""
return await self.call_tool("performance_stats", {
"metric_type": metric_type,
"time_range": time_range
})
# SQL query execution tool (supports catalog federation queries) # SQL query execution tool (supports catalog federation queries)
@mcp.tool( @mcp.tool(
@@ -352,81 +262,227 @@ class DorisToolsManager:
"random_string": random_string "random_string": random_string
}) })
logger.info("Successfully registered 11 tools to MCP server (2 core tools + 9 migrated tools)") # SQL Explain tool
@mcp.tool(
"get_sql_explain",
description="""[Function Description]: Get SQL execution plan using EXPLAIN command based on Doris syntax.
[Parameter Content]:
- sql (string) [Required] - SQL statement to explain
- verbose (boolean) [Optional] - Whether to show verbose information, default is false
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
)
async def get_sql_explain_tool(
sql: str,
verbose: bool = False,
db_name: str = None,
catalog_name: str = None
) -> str:
"""Get SQL execution plan"""
return await self.call_tool("get_sql_explain", {
"sql": sql,
"verbose": verbose,
"db_name": db_name,
"catalog_name": catalog_name
})
# SQL Profile tool
@mcp.tool(
"get_sql_profile",
description="""[Function Description]: Get SQL execution profile by setting trace ID and fetching profile via FE HTTP API.
[Parameter Content]:
- sql (string) [Required] - SQL statement to profile
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
- timeout (integer) [Optional] - Query timeout in seconds, default is 30
""",
)
async def get_sql_profile_tool(
sql: str,
db_name: str = None,
catalog_name: str = None,
timeout: int = 30
) -> str:
"""Get SQL execution profile"""
return await self.call_tool("get_sql_profile", {
"sql": sql,
"db_name": db_name,
"catalog_name": catalog_name,
"timeout": timeout
})
# Table data size tool
@mcp.tool(
"get_table_data_size",
description="""[Function Description]: Get table data size information via FE HTTP API.
[Parameter Content]:
- db_name (string) [Optional] - Database name, if not specified returns all databases
- table_name (string) [Optional] - Table name, if not specified returns all tables in the database
- single_replica (boolean) [Optional] - Whether to get single replica data size, default is false
""",
)
async def get_table_data_size_tool(
db_name: str = None,
table_name: str = None,
single_replica: bool = False
) -> str:
"""Get table data size information"""
return await self.call_tool("get_table_data_size", {
"db_name": db_name,
"table_name": table_name,
"single_replica": single_replica
})
# Monitoring metrics definition tool
@mcp.tool(
"get_monitoring_metrics_info",
description="""[Function Description]: Get Doris monitoring metrics definitions and descriptions without executing queries.
[Parameter Content]:
- role (string) [Optional] - Node role to get metric definitions for, default is "all"
* "fe": Only FE metrics definitions
* "be": Only BE metrics definitions
* "all": Both FE and BE metrics definitions
- monitor_type (string) [Optional] - Type of monitoring metrics, default is "all"
* "process": Process monitoring metrics
* "jvm": JVM monitoring metrics (FE only)
* "machine": Machine monitoring metrics
* "all": All monitoring types
- priority (string) [Optional] - Metric priority level, default is "core"
* "core": Only core essential metrics (10-12 items for production use)
* "p0": Only P0 (highest priority) metrics definitions
* "all": All metrics definitions (P0 and non-P0)
""",
)
async def get_monitoring_metrics_info_tool(
role: str = "all",
monitor_type: str = "all",
priority: str = "core"
) -> str:
"""Get Doris monitoring metrics definitions"""
return await self.call_tool("get_monitoring_metrics_info", {
"role": role,
"monitor_type": monitor_type,
"priority": priority
})
# Monitoring metrics data tool
@mcp.tool(
"get_monitoring_metrics_data",
description="""[Function Description]: Get actual Doris monitoring metrics data from FE and BE nodes via HTTP API.
[Parameter Content]:
- role (string) [Optional] - Node role to monitor, default is "all"
* "fe": Only FE nodes
* "be": Only BE nodes
* "all": Both FE and BE nodes
- monitor_type (string) [Optional] - Type of monitoring metrics, default is "all"
* "process": Process monitoring metrics
* "jvm": JVM monitoring metrics (FE only)
* "machine": Machine monitoring metrics
* "all": All monitoring types
- priority (string) [Optional] - Metric priority level, default is "core"
* "core": Only core essential metrics (10-12 items for production use)
* "p0": Only P0 (highest priority) metrics
* "all": All metrics (P0 and non-P0)
- include_raw_metrics (boolean) [Optional] - Whether to include raw detailed metrics data (can be very large)
""",
)
async def get_monitoring_metrics_data_tool(
role: str = "all",
monitor_type: str = "all",
priority: str = "core",
include_raw_metrics: bool = False
) -> str:
"""Get Doris monitoring metrics data"""
return await self.call_tool("get_monitoring_metrics_data", {
"role": role,
"monitor_type": monitor_type,
"priority": priority,
"include_raw_metrics": include_raw_metrics
})
# Real-time memory tracker tool
@mcp.tool(
"get_realtime_memory_stats",
description="""[Function Description]: Get real-time memory statistics via Doris BE Memory Tracker web interface.
[Parameter Content]:
- tracker_type (string) [Optional] - Type of memory trackers to retrieve, default is "overview"
* "overview": Overview type trackers (process memory, tracked memory summary)
* "global": Global shared memory trackers (cache, metadata)
* "query": Query-related memory trackers
* "load": Load-related memory trackers
* "compaction": Compaction-related memory trackers
* "all": All memory tracker types
- include_details (boolean) [Optional] - Whether to include detailed tracker information and definitions, default is true
""",
)
async def get_realtime_memory_stats_tool(
tracker_type: str = "overview",
include_details: bool = True
) -> str:
"""Get real-time memory statistics tool"""
return await self.call_tool("get_realtime_memory_stats", {
"tracker_type": tracker_type,
"include_details": include_details
})
# Historical memory tracker tool
@mcp.tool(
"get_historical_memory_stats",
description="""[Function Description]: Get historical memory statistics via Doris BE Bvar interface.
[Parameter Content]:
- tracker_names (array) [Optional] - List of specific tracker names to query, if not specified will get common trackers
* Example: ["process_resident_memory", "global", "query", "load", "compaction"]
- time_range (string) [Optional] - Time range for historical data, default is "1h"
* "1h": Last 1 hour
* "6h": Last 6 hours
* "24h": Last 24 hours
""",
)
async def get_historical_memory_stats_tool(
tracker_names: List[str] = None,
time_range: str = "1h"
) -> str:
"""Get historical memory statistics tool"""
return await self.call_tool("get_historical_memory_stats", {
"tracker_names": tracker_names,
"time_range": time_range
})
logger.info("Successfully registered 16 tools to MCP server")
async def list_tools(self) -> List[Tool]: async def list_tools(self) -> List[Tool]:
"""List all available query tools (for stdio mode)""" """List all available query tools (for stdio mode)"""
tools = [ tools = [
Tool(
name="column_analysis[Experimental]",
description="""[Important]: This tool is experimental and may not be fully functional!
[Function Description]: Analyze statistical information and data distribution of the specified column.
[Parameter Content]:
- table_name (string) [Required] - Name of the table to analyze
- column_name (string) [Required] - Name of the column to analyze
- analysis_type (string) [Optional] - Type of analysis to perform, default is "basic"
* "basic": Basic statistics (count, null values, distinct values)
* "distribution": Data distribution analysis (frequency, percentiles)
* "detailed": Comprehensive analysis including all above plus patterns and outliers
""",
inputSchema={
"type": "object",
"properties": {
"table_name": {"type": "string", "description": "Table name"},
"column_name": {
"type": "string",
"description": "Column name to analyze",
},
"analysis_type": {
"type": "string",
"enum": ["basic", "distribution", "detailed"],
"description": "Analysis type",
"default": "basic",
},
},
"required": ["table_name", "column_name"],
},
),
Tool(
name="performance_stats",
description="""[Function Description]: Get database performance statistics information.
[Parameter Content]:
- metric_type (string) [Optional] - Type of performance metrics to retrieve, default is "queries"
* "queries": Query performance metrics (execution time, frequency, etc.)
* "connections": Connection statistics (active connections, connection pool status)
* "tables": Table-level statistics (size, row count, access patterns)
* "system": System-level metrics (CPU, memory, disk usage)
- time_range (string) [Optional] - Time range for statistics, default is "1h"
* "1h": Last 1 hour
* "6h": Last 6 hours
* "24h": Last 24 hours
* "7d": Last 7 days
""",
inputSchema={
"type": "object",
"properties": {
"metric_type": {
"type": "string",
"enum": ["queries", "connections", "tables", "system"],
"description": "Performance metric type",
"default": "queries",
},
"time_range": {
"type": "string",
"enum": ["1h", "6h", "24h", "7d"],
"description": "Time range",
"default": "1h",
},
},
},
),
Tool( Tool(
name="exec_query", name="exec_query",
description="""[Function Description]: Execute SQL query and return result command with catalog federation support. description="""[Function Description]: Execute SQL query and return result command with catalog federation support.
@@ -610,6 +666,188 @@ class DorisToolsManager:
"required": ["random_string"], "required": ["random_string"],
}, },
), ),
Tool(
name="get_sql_explain",
description="""[Function Description]: Get SQL execution plan using EXPLAIN command based on Doris syntax.
[Parameter Content]:
- sql (string) [Required] - SQL statement to explain
- verbose (boolean) [Optional] - Whether to show verbose information, default is false
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
""",
inputSchema={
"type": "object",
"properties": {
"sql": {"type": "string", "description": "SQL statement to explain"},
"verbose": {"type": "boolean", "description": "Whether to show verbose information", "default": False},
"db_name": {"type": "string", "description": "Database name"},
"catalog_name": {"type": "string", "description": "Catalog name"},
},
"required": ["sql"],
},
),
Tool(
name="get_sql_profile",
description="""[Function Description]: Get SQL execution profile by setting trace ID and fetching profile via FE HTTP API.
[Parameter Content]:
- sql (string) [Required] - SQL statement to profile
- db_name (string) [Optional] - Target database name, defaults to the current database
- catalog_name (string) [Optional] - Target catalog name for federation queries, defaults to current catalog
- timeout (integer) [Optional] - Query timeout in seconds, default is 30
""",
inputSchema={
"type": "object",
"properties": {
"sql": {"type": "string", "description": "SQL statement to profile"},
"db_name": {"type": "string", "description": "Database name"},
"catalog_name": {"type": "string", "description": "Catalog name"},
"timeout": {"type": "integer", "description": "Query timeout in seconds", "default": 30},
},
"required": ["sql"],
},
),
Tool(
name="get_table_data_size",
description="""[Function Description]: Get table data size information via FE HTTP API.
[Parameter Content]:
- db_name (string) [Optional] - Database name, if not specified returns all databases
- table_name (string) [Optional] - Table name, if not specified returns all tables in the database
- single_replica (boolean) [Optional] - Whether to get single replica data size, default is false
""",
inputSchema={
"type": "object",
"properties": {
"db_name": {"type": "string", "description": "Database name"},
"table_name": {"type": "string", "description": "Table name"},
"single_replica": {"type": "boolean", "description": "Whether to get single replica data size", "default": False},
},
},
),
Tool(
name="get_monitoring_metrics_info",
description="""[Function Description]: Get Doris monitoring metrics definitions and descriptions without executing queries.
[Parameter Content]:
- role (string) [Optional] - Node role to get metric definitions for, default is "all"
* "fe": Only FE metrics definitions
* "be": Only BE metrics definitions
* "all": Both FE and BE metrics definitions
- monitor_type (string) [Optional] - Type of monitoring metrics, default is "all"
* "process": Process monitoring metrics
* "jvm": JVM monitoring metrics (FE only)
* "machine": Machine monitoring metrics
* "all": All monitoring types
- priority (string) [Optional] - Metric priority level, default is "core"
* "core": Only core essential metrics (10-12 items for production use)
* "p0": Only P0 (highest priority) metrics definitions
* "all": All metrics definitions (P0 and non-P0)
""",
inputSchema={
"type": "object",
"properties": {
"role": {"type": "string", "enum": ["fe", "be", "all"], "description": "Node role to get metric definitions for", "default": "all"},
"monitor_type": {"type": "string", "enum": ["process", "jvm", "machine", "all"], "description": "Type of monitoring metrics", "default": "all"},
"priority": {"type": "string", "enum": ["core", "p0", "all"], "description": "Metric priority level", "default": "core"},
},
},
),
Tool(
name="get_monitoring_metrics_data",
description="""[Function Description]: Get actual Doris monitoring metrics data from FE and BE nodes via HTTP API.
[Parameter Content]:
- role (string) [Optional] - Node role to monitor, default is "all"
* "fe": Only FE nodes
* "be": Only BE nodes
* "all": Both FE and BE nodes
- monitor_type (string) [Optional] - Type of monitoring metrics, default is "all"
* "process": Process monitoring metrics
* "jvm": JVM monitoring metrics (FE only)
* "machine": Machine monitoring metrics
* "all": All monitoring types
- priority (string) [Optional] - Metric priority level, default is "core"
* "core": Only core essential metrics (10-12 items for production use)
* "p0": Only P0 (highest priority) metrics
* "all": All metrics (P0 and non-P0)
- include_raw_metrics (boolean) [Optional] - Whether to include raw detailed metrics data (can be very large)
""",
inputSchema={
"type": "object",
"properties": {
"role": {"type": "string", "enum": ["fe", "be", "all"], "description": "Node role to monitor", "default": "all"},
"monitor_type": {"type": "string", "enum": ["process", "jvm", "machine", "all"], "description": "Type of monitoring metrics", "default": "all"},
"priority": {"type": "string", "enum": ["core", "p0", "all"], "description": "Metric priority level", "default": "core"},
"include_raw_metrics": {"type": "boolean", "description": "Whether to include raw detailed metrics data (can be very large)", "default": False},
},
},
),
Tool(
name="get_realtime_memory_stats",
description="""[Function Description]: Get real-time memory statistics via Doris BE Memory Tracker web interface.
[Parameter Content]:
- tracker_type (string) [Optional] - Type of memory trackers to retrieve, default is "overview"
* "overview": Overview type trackers (process memory, tracked memory summary)
* "global": Global shared memory trackers (cache, metadata)
* "query": Query-related memory trackers
* "load": Load-related memory trackers
* "compaction": Compaction-related memory trackers
* "all": All memory tracker types
- include_details (boolean) [Optional] - Whether to include detailed tracker information and definitions, default is true
""",
inputSchema={
"type": "object",
"properties": {
"tracker_type": {"type": "string", "enum": ["overview", "global", "query", "load", "compaction", "all"], "description": "Type of memory trackers to retrieve", "default": "overview"},
"include_details": {"type": "boolean", "description": "Whether to include detailed tracker information and definitions", "default": True},
},
},
),
Tool(
name="get_historical_memory_stats",
description="""[Function Description]: Get historical memory statistics via Doris BE Bvar interface.
[Parameter Content]:
- tracker_names (array) [Optional] - List of specific tracker names to query, if not specified will get common trackers
* Example: ["process_resident_memory", "global", "query", "load", "compaction"]
- time_range (string) [Optional] - Time range for historical data, default is "1h"
* "1h": Last 1 hour
* "6h": Last 6 hours
* "24h": Last 24 hours
""",
inputSchema={
"type": "object",
"properties": {
"tracker_names": {"type": "array", "items": {"type": "string"}, "description": "List of specific tracker names to query"},
"time_range": {"type": "string", "enum": ["1h", "6h", "24h"], "description": "Time range for historical data", "default": "1h"},
},
},
),
] ]
return tools return tools
@@ -622,12 +860,7 @@ class DorisToolsManager:
start_time = time.time() start_time = time.time()
# Tool routing - dispatch requests to corresponding business logic processors # Tool routing - dispatch requests to corresponding business logic processors
if name == "column_analysis": if name == "exec_query":
result = await self._column_analysis_tool(arguments)
elif name == "performance_stats":
result = await self._performance_stats_tool(arguments)
# ===== 9 tool routes migrated from source project =====
elif name == "exec_query":
result = await self._exec_query_tool(arguments) result = await self._exec_query_tool(arguments)
elif name == "get_table_schema": elif name == "get_table_schema":
result = await self._get_table_schema_tool(arguments) result = await self._get_table_schema_tool(arguments)
@@ -645,6 +878,20 @@ class DorisToolsManager:
result = await self._get_recent_audit_logs_tool(arguments) result = await self._get_recent_audit_logs_tool(arguments)
elif name == "get_catalog_list": elif name == "get_catalog_list":
result = await self._get_catalog_list_tool(arguments) result = await self._get_catalog_list_tool(arguments)
elif name == "get_sql_explain":
result = await self._get_sql_explain_tool(arguments)
elif name == "get_sql_profile":
result = await self._get_sql_profile_tool(arguments)
elif name == "get_table_data_size":
result = await self._get_table_data_size_tool(arguments)
elif name == "get_monitoring_metrics_info":
result = await self._get_monitoring_metrics_info_tool(arguments)
elif name == "get_monitoring_metrics_data":
result = await self._get_monitoring_metrics_data_tool(arguments)
elif name == "get_realtime_memory_stats":
result = await self._get_realtime_memory_stats_tool(arguments)
elif name == "get_historical_memory_stats":
result = await self._get_historical_memory_stats_tool(arguments)
else: else:
raise ValueError(f"Unknown tool: {name}") raise ValueError(f"Unknown tool: {name}")
@@ -670,28 +917,6 @@ class DorisToolsManager:
} }
return json.dumps(error_result, ensure_ascii=False, indent=2) return json.dumps(error_result, ensure_ascii=False, indent=2)
# The following are tool routing methods, responsible for calling corresponding business logic processors
async def _column_analysis_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Column statistical analysis tool routing"""
table_name = arguments.get("table_name")
column_name = arguments.get("column_name")
analysis_type = arguments.get("analysis_type", "basic")
# Delegate to table analyzer for processing
return await self.table_analyzer.analyze_column(
table_name, column_name, analysis_type
)
async def _performance_stats_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Database performance statistics tool routing"""
metric_type = arguments.get("metric_type", "queries")
time_range = arguments.get("time_range", "1h")
# Delegate to performance monitor for processing
return await self.performance_monitor.get_performance_stats(
metric_type, time_range
)
async def _exec_query_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]: async def _exec_query_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""SQL query execution tool routing (supports federation queries)""" """SQL query execution tool routing (supports federation queries)"""
@@ -780,3 +1005,81 @@ class DorisToolsManager:
# Delegate to metadata extractor for processing # Delegate to metadata extractor for processing
return await self.metadata_extractor.get_catalog_list_for_mcp() return await self.metadata_extractor.get_catalog_list_for_mcp()
async def _get_sql_explain_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""SQL Explain tool routing"""
sql = arguments.get("sql")
verbose = arguments.get("verbose", False)
db_name = arguments.get("db_name")
catalog_name = arguments.get("catalog_name")
# Delegate to SQL analyzer for processing
return await self.sql_analyzer.get_sql_explain(
sql, verbose, db_name, catalog_name
)
async def _get_sql_profile_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""SQL Profile tool routing"""
sql = arguments.get("sql")
db_name = arguments.get("db_name")
catalog_name = arguments.get("catalog_name")
timeout = arguments.get("timeout", 30)
# Delegate to SQL analyzer for processing
return await self.sql_analyzer.get_sql_profile(
sql, db_name, catalog_name, timeout
)
async def _get_table_data_size_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Table data size tool routing"""
db_name = arguments.get("db_name")
table_name = arguments.get("table_name")
single_replica = arguments.get("single_replica", False)
# Delegate to SQL analyzer for processing
return await self.sql_analyzer.get_table_data_size(
db_name, table_name, single_replica
)
async def _get_monitoring_metrics_info_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Monitoring metrics info tool routing"""
role = arguments.get("role", "all")
monitor_type = arguments.get("monitor_type", "all")
priority = arguments.get("priority", "p0")
# Delegate to monitoring tools for processing (info_only=True)
return await self.monitoring_tools.get_monitoring_metrics(
role, monitor_type, priority, info_only=True, format_type="prometheus"
)
async def _get_monitoring_metrics_data_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Monitoring metrics data tool routing"""
role = arguments.get("role", "all")
monitor_type = arguments.get("monitor_type", "all")
priority = arguments.get("priority", "p0")
include_raw_metrics = arguments.get("include_raw_metrics", False)
# Delegate to monitoring tools for processing (info_only=False)
return await self.monitoring_tools.get_monitoring_metrics(
role, monitor_type, priority, info_only=False, format_type="prometheus", include_raw_metrics=include_raw_metrics
)
async def _get_realtime_memory_stats_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Real-time memory statistics tool routing"""
tracker_type = arguments.get("tracker_type", "overview")
include_details = arguments.get("include_details", True)
# Delegate to memory tracker for processing
return await self.memory_tracker.get_realtime_memory_stats(
tracker_type, include_details
)
async def _get_historical_memory_stats_tool(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Historical memory statistics tool routing"""
tracker_names = arguments.get("tracker_names")
time_range = arguments.get("time_range", "1h")
# Delegate to memory tracker for processing
return await self.memory_tracker.get_historical_memory_stats(
tracker_names, time_range
)

View File

@@ -22,6 +22,10 @@ Provides data analysis functions including table analysis, column statistics, pe
import time import time
from datetime import datetime from datetime import datetime
from typing import Any, Dict, List from typing import Any, Dict, List
import uuid
import aiohttp
import hashlib
from pathlib import Path
from .db import DorisConnectionManager from .db import DorisConnectionManager
from .logger import get_logger from .logger import get_logger
@@ -332,3 +336,905 @@ class PerformanceMonitor:
"order_by": order_by, "order_by": order_by,
"note": "Query history feature requires audit log configuration" "note": "Query history feature requires audit log configuration"
} }
class SQLAnalyzer:
"""SQL analyzer for EXPLAIN and PROFILE operations"""
def __init__(self, connection_manager: DorisConnectionManager):
self.connection_manager = connection_manager
async def get_sql_explain(
self,
sql: str,
verbose: bool = False,
db_name: str = None,
catalog_name: str = None
) -> Dict[str, Any]:
"""
Get SQL execution plan using EXPLAIN command based on Doris syntax
Args:
sql: SQL statement to explain
verbose: Whether to show verbose information
db_name: Target database name
catalog_name: Target catalog name
Returns:
Dict containing explain plan file path, content, and basic info
"""
try:
# Generate unique query ID for file naming
import time
query_hash = hashlib.md5(sql.encode()).hexdigest()[:8]
timestamp = int(time.time())
query_id = f"{timestamp}_{query_hash}"
# Ensure temp directory exists
temp_dir = Path(self.connection_manager.config.temp_files_dir)
temp_dir.mkdir(parents=True, exist_ok=True)
# Create explain file path
explain_file = temp_dir / f"explain_{query_id}.txt"
logger.info(f"Generating SQL explain for query ID: {query_id}")
# Switch database if specified
if db_name:
await self.connection_manager.execute_query("explain_session", f"USE {db_name}")
# Construct EXPLAIN query
explain_type = "EXPLAIN VERBOSE" if verbose else "EXPLAIN"
explain_sql = f"{explain_type} {sql.strip().rstrip(';')}"
logger.info(f"Executing explain query: {explain_sql}")
# Execute explain query
result = await self.connection_manager.execute_query("explain_session", explain_sql)
# Format explain output
explain_content = []
explain_content.append(f"=== SQL EXPLAIN PLAN ===")
explain_content.append(f"Query ID: {query_id}")
explain_content.append(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
explain_content.append(f"Database: {db_name or 'current'}")
explain_content.append(f"Verbose: {verbose}")
explain_content.append("")
explain_content.append("=== ORIGINAL SQL ===")
explain_content.append(sql)
explain_content.append("")
explain_content.append("=== EXPLAIN QUERY ===")
explain_content.append(explain_sql)
explain_content.append("")
explain_content.append("=== EXECUTION PLAN ===")
if result and result.data:
for row in result.data:
if isinstance(row, dict):
# Handle dict format
for key, value in row.items():
explain_content.append(f"{key}: {value}")
elif isinstance(row, (list, tuple)):
# Handle tuple/list format
explain_content.append(" | ".join(str(col) for col in row))
else:
# Handle string format
explain_content.append(str(row))
else:
explain_content.append("No execution plan data returned")
explain_content.append("")
explain_content.append("=== METADATA ===")
explain_content.append(f"Execution time: {result.execution_time if result else 'N/A'} seconds")
explain_content.append(f"Rows returned: {len(result.data) if result and result.data else 0}")
# Get full content
full_content = '\n'.join(explain_content)
# Write to file
with open(explain_file, 'w', encoding='utf-8') as f:
f.write(full_content)
logger.info(f"Explain plan saved to: {explain_file.absolute()}")
# Get max response size from config
max_size = self.connection_manager.config.performance.max_response_content_size
# Truncate content if needed
truncated_content = full_content
is_truncated = False
if len(full_content) > max_size:
truncated_content = full_content[:max_size] + "\n\n=== CONTENT TRUNCATED ===\n[Content is truncated due to size limit. Full content is saved to file.]"
is_truncated = True
return {
"success": True,
"query_id": query_id,
"explain_file_path": str(explain_file.absolute()),
"file_size_bytes": explain_file.stat().st_size,
"content": truncated_content,
"content_size": len(truncated_content),
"is_content_truncated": is_truncated,
"original_content_size": len(full_content),
"sql_preview": sql[:100] + "..." if len(sql) > 100 else sql,
"verbose": verbose,
"database": db_name,
"catalog": catalog_name,
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
"execution_time": result.execution_time if result else None,
"plan_lines_count": len(result.data) if result and result.data else 0
}
except Exception as e:
logger.error(f"Failed to get SQL explain: {str(e)}")
return {
"success": False,
"error": f"Failed to get SQL explain: {str(e)}",
"sql_preview": sql[:100] + "..." if len(sql) > 100 else sql,
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S')
}
async def get_sql_profile(
self,
sql: str,
db_name: str = None,
catalog_name: str = None,
timeout: int = 30
) -> Dict[str, Any]:
"""
Get SQL execution profile by setting trace ID and fetching profile via HTTP API
Args:
sql: SQL statement to profile
db_name: Target database name
catalog_name: Target catalog name
timeout: Query timeout in seconds
Returns:
Dict containing profile file path, content, and basic info
"""
try:
# Generate unique trace ID and query ID for file naming
trace_id = str(uuid.uuid4())
import time
query_hash = hashlib.md5(sql.encode()).hexdigest()[:8]
timestamp = int(time.time())
file_query_id = f"{timestamp}_{query_hash}"
# Ensure temp directory exists
temp_dir = Path(self.connection_manager.config.temp_files_dir)
temp_dir.mkdir(parents=True, exist_ok=True)
# Create profile file path
profile_file = temp_dir / f"profile_{file_query_id}.txt"
logger.info(f"Generated trace ID for SQL profiling: {trace_id}")
logger.info(f"Profile will be saved to: {profile_file}")
connection = await self.connection_manager.get_connection("query")
try:
# Switch to specified database/catalog if provided
if catalog_name:
await connection.execute(f"USE `{catalog_name}`")
if db_name:
await connection.execute(f"USE `{db_name}`")
# Set trace ID for the session using session variable
# According to official docs: set session_context="trace_id:your_trace_id"
await connection.execute(f'set session_context="trace_id:{trace_id}"')
logger.info(f"Set trace ID: {trace_id}")
# Enable profile
await connection.execute(f'set enable_profile=true')
logger.info(f"Enabled profile")
# Execute the SQL statement
logger.info(f"Executing SQL with trace ID: {sql}")
start_time = time.time()
sql_result = await connection.execute(sql)
execution_time = time.time() - start_time
logger.info(f"SQL execution completed in {execution_time:.3f}s")
# Get query ID from trace ID via HTTP API
query_id = await self._get_query_id_by_trace_id(trace_id)
if not query_id:
return {
"success": False,
"error": "Failed to get query ID from trace ID",
"trace_id": trace_id,
"sql": sql,
"execution_time": execution_time
}
logger.info(f"Retrieved query ID: {query_id}")
# Get profile data
profile_data = await self._get_profile_by_query_id(query_id)
if not profile_data:
# Save error info to file
profile_content = [
f"=== SQL PROFILE RESULT ===",
f"File Query ID: {file_query_id}",
f"Trace ID: {trace_id}",
f"Query ID: {query_id}",
f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}",
f"Database: {db_name or 'current'}",
f"Status: FAILED",
"",
"=== ORIGINAL SQL ===",
sql,
"",
"=== ERROR INFO ===",
"Failed to get profile data. This may be due to:",
"1) Profile data not generated yet",
"2) Query ID expired",
"3) Insufficient permissions to access profile data",
"",
"=== EXECUTION INFO ===",
f"Query execution: SUCCESSFUL",
f"Execution time: {execution_time:.3f} seconds",
f"Note: Query execution was successful, but profile data is not available"
]
# Get full content
full_profile_content = '\n'.join(profile_content)
with open(profile_file, 'w', encoding='utf-8') as f:
f.write(full_profile_content)
# Get max response size from config
max_size = self.connection_manager.config.performance.max_response_content_size
# Truncate content if needed
truncated_content = full_profile_content
is_truncated = False
if len(full_profile_content) > max_size:
truncated_content = full_profile_content[:max_size] + "\n\n=== CONTENT TRUNCATED ===\n[Content is truncated due to size limit. Full content is saved to file.]"
is_truncated = True
return {
"success": False,
"file_query_id": file_query_id,
"trace_id": trace_id,
"query_id": query_id,
"profile_file_path": str(profile_file.absolute()),
"file_size_bytes": profile_file.stat().st_size,
"content": truncated_content,
"content_size": len(truncated_content),
"is_content_truncated": is_truncated,
"original_content_size": len(full_profile_content),
"sql_preview": sql[:100] + "..." if len(sql) > 100 else sql,
"execution_time": execution_time,
"error": "Failed to get profile data",
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S')
}
# Format profile output
profile_content = []
profile_content.append(f"=== SQL PROFILE RESULT ===")
profile_content.append(f"File Query ID: {file_query_id}")
profile_content.append(f"Trace ID: {trace_id}")
profile_content.append(f"Query ID: {query_id}")
profile_content.append(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
profile_content.append(f"Database: {db_name or 'current'}")
profile_content.append(f"Status: SUCCESS")
profile_content.append("")
profile_content.append("=== ORIGINAL SQL ===")
profile_content.append(sql)
profile_content.append("")
profile_content.append("=== EXECUTION INFO ===")
profile_content.append(f"Execution time: {execution_time:.3f} seconds")
if hasattr(sql_result, 'data') and sql_result.data:
profile_content.append(f"Result rows: {len(sql_result.data)}")
if sql_result.data and sql_result.data[0]:
profile_content.append(f"Result columns: {list(sql_result.data[0].keys())}")
profile_content.append("")
profile_content.append("=== PROFILE DATA ===")
if isinstance(profile_data, dict):
import json
profile_content.append(json.dumps(profile_data, indent=2, ensure_ascii=False))
else:
profile_content.append(str(profile_data))
# Get full content
full_profile_content = '\n'.join(profile_content)
# Write to file
with open(profile_file, 'w', encoding='utf-8') as f:
f.write(full_profile_content)
logger.info(f"Profile data saved to: {profile_file.absolute()}")
# Get max response size from config
max_size = self.connection_manager.config.performance.max_response_content_size
# Truncate content if needed
truncated_content = full_profile_content
is_truncated = False
if len(full_profile_content) > max_size:
truncated_content = full_profile_content[:max_size] + "\n\n=== CONTENT TRUNCATED ===\n[Content is truncated due to size limit. Full content is saved to file.]"
is_truncated = True
return {
"success": True,
"file_query_id": file_query_id,
"trace_id": trace_id,
"query_id": query_id,
"profile_file_path": str(profile_file.absolute()),
"file_size_bytes": profile_file.stat().st_size,
"content": truncated_content,
"content_size": len(truncated_content),
"is_content_truncated": is_truncated,
"original_content_size": len(full_profile_content),
"sql_preview": sql[:100] + "..." if len(sql) > 100 else sql,
"database": db_name,
"catalog": catalog_name,
"execution_time": execution_time,
"sql_result_summary": {
"row_count": len(sql_result.data) if hasattr(sql_result, 'data') and sql_result.data else 0,
"columns": list(sql_result.data[0].keys()) if hasattr(sql_result, 'data') and sql_result.data and sql_result.data[0] else []
},
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S')
}
except Exception as e:
logger.error(f"Error during SQL execution or profile retrieval: {str(e)}")
# Save error info to file
profile_content = [
f"=== SQL PROFILE RESULT ===",
f"File Query ID: {file_query_id}",
f"Trace ID: {trace_id}",
f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}",
f"Database: {db_name or 'current'}",
f"Status: ERROR",
"",
"=== ORIGINAL SQL ===",
sql,
"",
"=== ERROR INFO ===",
f"SQL execution or profile retrieval failed: {str(e)}",
"",
"=== EXECUTION INFO ===",
"Query execution failed during profiling process"
]
# Get full content
full_profile_content = '\n'.join(profile_content)
with open(profile_file, 'w', encoding='utf-8') as f:
f.write(full_profile_content)
# Get max response size from config
max_size = self.connection_manager.config.performance.max_response_content_size
# Truncate content if needed
truncated_content = full_profile_content
is_truncated = False
if len(full_profile_content) > max_size:
truncated_content = full_profile_content[:max_size] + "\n\n=== CONTENT TRUNCATED ===\n[Content is truncated due to size limit. Full content is saved to file.]"
is_truncated = True
return {
"success": False,
"file_query_id": file_query_id,
"trace_id": trace_id,
"profile_file_path": str(profile_file.absolute()),
"file_size_bytes": profile_file.stat().st_size,
"content": truncated_content,
"content_size": len(truncated_content),
"is_content_truncated": is_truncated,
"original_content_size": len(full_profile_content),
"sql_preview": sql[:100] + "..." if len(sql) > 100 else sql,
"error": f"SQL execution or profile retrieval failed: {str(e)}",
"database": db_name,
"catalog": catalog_name,
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S')
}
except Exception as e:
logger.error(f"SQL PROFILE failed: {str(e)}")
return {
"success": False,
"error": f"SQL PROFILE failed: {str(e)}",
"sql_preview": sql[:100] + "..." if len(sql) > 100 else sql,
"database": db_name,
"catalog": catalog_name,
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S')
}
async def _get_query_id_by_trace_id(self, trace_id: str) -> str:
"""
Get query ID by trace ID via FE HTTP API
Args:
trace_id: The trace ID set during query execution
Returns:
Query ID string or None if not found
"""
try:
# Get database config
db_config = self.connection_manager.config.database
# Build HTTP API URL according to official documentation
# Reference: https://doris.apache.org/zh-CN/docs/admin-manual/open-api/fe-http/query-profile-action#通过-trace-id-获取-query-id
url = f"http://{db_config.host}:{db_config.fe_http_port}/rest/v2/manager/query/trace_id/{trace_id}"
# HTTP Basic Auth
auth = aiohttp.BasicAuth(db_config.user, db_config.password)
logger.info(f"Requesting query ID from: {url}")
async with aiohttp.ClientSession() as session:
async with session.get(url, auth=auth, timeout=10) as response:
if response.status == 200:
# Check content type first
content_type = response.headers.get('content-type', '')
response_text = await response.text()
logger.info(f"Response content type: {content_type}")
logger.info(f"Response body: {response_text}")
# Parse JSON response (regardless of content-type)
if response_text.strip():
try:
import json
result = json.loads(response_text)
logger.info(f"Query ID API response: {result}")
# Parse response according to Doris API format
if result.get("code") == 0 and result.get("data"):
data = result["data"]
# Data can be either a string (query_id) or object with query_ids
if isinstance(data, str):
logger.info(f"Found query ID: {data}")
return data
elif isinstance(data, dict) and "query_ids" in data:
query_ids = data["query_ids"]
if query_ids:
query_id = query_ids[0] # Take the first query ID
logger.info(f"Found query ID: {query_id}")
return query_id
else:
logger.warning("No query IDs found in response")
else:
logger.error(f"API returned error: {result}")
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON response: {e}")
# Fallback: try to extract query ID using regex
import re
query_id_pattern = r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
matches = re.findall(query_id_pattern, response_text)
if matches:
query_id = matches[0]
logger.info(f"Extracted query ID from text: {query_id}")
return query_id
else:
logger.error(f"HTTP request failed with status {response.status}")
response_text = await response.text()
logger.error(f"Response body: {response_text}")
return None
except Exception as e:
logger.error(f"Failed to get query ID by trace ID: {str(e)}")
return None
async def _get_profile_by_query_id(self, query_id: str) -> Dict[str, Any]:
"""
Get profile data by query ID via FE HTTP API
Args:
query_id: The query ID
Returns:
Profile data dict or None if failed
"""
try:
# Get database config
db_config = self.connection_manager.config.database
# Try both API endpoints according to official documentation
urls = [
f"http://{db_config.host}:{db_config.fe_http_port}/rest/v2/manager/query/profile/text/{query_id}",
f"http://{db_config.host}:{db_config.fe_http_port}/api/profile/text?query_id={query_id}"
]
# HTTP Basic Auth
auth = aiohttp.BasicAuth(db_config.user, db_config.password)
for i, url in enumerate(urls):
logger.info(f"Requesting profile from URL {i+1}: {url}")
async with aiohttp.ClientSession() as session:
async with session.get(url, auth=auth, timeout=60) as response:
if response.status == 200:
content_type = response.headers.get('content-type', '')
response_text = await response.text()
logger.info(f"Profile response content type: {content_type}")
logger.info(f"Profile response length: {len(response_text)}")
# Handle JSON response
if 'application/json' in content_type:
try:
result = await response.json()
logger.info(f"Profile JSON response: {result}")
if result.get("code") == 0 and result.get("data"):
profile_text = result["data"].get("profile", "")
return {
"query_id": query_id,
"profile_text": profile_text,
"profile_size": len(profile_text),
"retrieved_at": datetime.now().isoformat(),
"api_endpoint": url
}
else:
logger.warning(f"Profile API returned error: {result}")
continue # Try next URL
except Exception as e:
logger.error(f"Failed to parse profile JSON: {e}")
continue
# Handle plain text response
else:
if response_text.strip() and "not found" not in response_text.lower():
return {
"query_id": query_id,
"profile_text": response_text,
"profile_size": len(response_text),
"retrieved_at": datetime.now().isoformat(),
"api_endpoint": url
}
else:
logger.warning(f"Profile not found or empty: {response_text}")
continue # Try next URL
elif response.status == 404:
logger.warning(f"Profile not found (404) at {url}")
continue # Try next URL
else:
logger.error(f"Profile HTTP request failed with status {response.status} at {url}")
response_text = await response.text()
logger.error(f"Response body: {response_text}")
continue # Try next URL
return None
except Exception as e:
logger.error(f"Failed to get profile by query ID: {str(e)}")
return None
async def get_table_data_size(
self,
db_name: str = None,
table_name: str = None,
single_replica: bool = False
) -> Dict[str, Any]:
"""
Get table data size information via FE HTTP API
Args:
db_name: Database name, if not specified returns all databases
table_name: Table name, if not specified returns all tables in the database
single_replica: Whether to get single replica data size
Returns:
Dict containing table data size information
"""
try:
# Get database config
db_config = self.connection_manager.config.database
# Build HTTP API URL according to official documentation
# Reference: https://doris.apache.org/zh-CN/docs/admin-manual/open-api/fe-http/show-table-data-action
url = f"http://{db_config.host}:{db_config.fe_http_port}/api/show_table_data"
# Build query parameters
params = {}
if db_name:
params["db"] = db_name
if table_name:
params["table"] = table_name
if single_replica:
params["single_replica"] = "true"
# HTTP Basic Auth
auth = aiohttp.BasicAuth(db_config.user, db_config.password)
logger.info(f"Requesting table data size from: {url} with params: {params}")
async with aiohttp.ClientSession() as session:
async with session.get(url, auth=auth, params=params, timeout=30) as response:
if response.status == 200:
response_text = await response.text()
logger.info(f"Table data size response length: {len(response_text)}")
try:
# Parse JSON response
import json
result = json.loads(response_text)
if result.get("code") == 0 and result.get("data"):
data = result["data"]
# Process and format the data
formatted_data = self._format_table_data_size(data, db_name, table_name, single_replica)
return {
"success": True,
"db_name": db_name,
"table_name": table_name,
"single_replica": single_replica,
"timestamp": datetime.now().isoformat(),
"data": formatted_data,
"url": url,
"note": "Table data size information from Doris FE HTTP API"
}
else:
return {
"success": False,
"error": f"API returned error: {result}",
"db_name": db_name,
"table_name": table_name,
"url": url,
"timestamp": datetime.now().isoformat()
}
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON response: {e}")
return {
"success": False,
"error": f"Failed to parse JSON response: {e}",
"response_text": response_text[:500], # First 500 chars for debugging
"url": url,
"timestamp": datetime.now().isoformat()
}
else:
logger.error(f"HTTP request failed with status {response.status}")
response_text = await response.text()
logger.error(f"Response body: {response_text}")
return {
"success": False,
"error": f"HTTP request failed with status {response.status}",
"response_text": response_text[:500], # First 500 chars for debugging
"url": url,
"timestamp": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Table data size request failed: {str(e)}")
return {
"success": False,
"error": f"Table data size request failed: {str(e)}",
"db_name": db_name,
"table_name": table_name,
"timestamp": datetime.now().isoformat()
}
def _format_table_data_size(self, data: Dict[str, Any], db_name: str, table_name: str, single_replica: bool) -> Dict[str, Any]:
"""
Format table data size response data
Args:
data: Raw response data from API
db_name: Database name filter
table_name: Table name filter
single_replica: Single replica flag
Returns:
Formatted data structure
"""
try:
formatted = {
"summary": {
"total_databases": 0,
"total_tables": 0,
"total_size_bytes": 0,
"total_size_formatted": "0 B",
"single_replica": single_replica,
"query_filters": {
"db_name": db_name,
"table_name": table_name
}
},
"databases": {}
}
# Process the data based on its structure
if isinstance(data, list):
# Data is a list of table records
for record in data:
db = record.get("database", "unknown")
table = record.get("table", "unknown")
size_bytes = int(record.get("size", 0))
if db not in formatted["databases"]:
formatted["databases"][db] = {
"database_name": db,
"table_count": 0,
"total_size_bytes": 0,
"total_size_formatted": "0 B",
"tables": {}
}
formatted["databases"][db]["tables"][table] = {
"table_name": table,
"size_bytes": size_bytes,
"size_formatted": self._format_bytes(size_bytes),
"replica_count": record.get("replica_count", 1),
"details": record
}
formatted["databases"][db]["table_count"] += 1
formatted["databases"][db]["total_size_bytes"] += size_bytes
formatted["summary"]["total_size_bytes"] += size_bytes
elif isinstance(data, dict):
# Data is a dict with database structure
for db, db_info in data.items():
if isinstance(db_info, dict) and "tables" in db_info:
formatted["databases"][db] = {
"database_name": db,
"table_count": len(db_info["tables"]),
"total_size_bytes": 0,
"total_size_formatted": "0 B",
"tables": {}
}
for table, table_info in db_info["tables"].items():
size_bytes = int(table_info.get("size", 0))
formatted["databases"][db]["tables"][table] = {
"table_name": table,
"size_bytes": size_bytes,
"size_formatted": self._format_bytes(size_bytes),
"replica_count": table_info.get("replica_count", 1),
"details": table_info
}
formatted["databases"][db]["total_size_bytes"] += size_bytes
formatted["summary"]["total_size_bytes"] += size_bytes
# Update summary
formatted["summary"]["total_databases"] = len(formatted["databases"])
formatted["summary"]["total_tables"] = sum(db["table_count"] for db in formatted["databases"].values())
formatted["summary"]["total_size_formatted"] = self._format_bytes(formatted["summary"]["total_size_bytes"])
# Update database totals formatting
for db_info in formatted["databases"].values():
db_info["total_size_formatted"] = self._format_bytes(db_info["total_size_bytes"])
return formatted
except Exception as e:
logger.error(f"Failed to format table data size: {str(e)}")
return {
"error": f"Failed to format data: {str(e)}",
"raw_data": data
}
def _format_bytes(self, bytes_value: int) -> str:
"""
Format bytes value to human readable string
Args:
bytes_value: Bytes value
Returns:
Formatted string like "1.23 GB"
"""
try:
bytes_value = int(bytes_value)
if bytes_value == 0:
return "0 B"
units = ["B", "KB", "MB", "GB", "TB", "PB"]
unit_index = 0
size = float(bytes_value)
while size >= 1024 and unit_index < len(units) - 1:
size /= 1024
unit_index += 1
if unit_index == 0:
return f"{int(size)} {units[unit_index]}"
else:
return f"{size:.2f} {units[unit_index]}"
except (ValueError, TypeError):
return str(bytes_value)
class MemoryTracker:
"""Memory tracker for Doris BE memory monitoring"""
def __init__(self, connection_manager: DorisConnectionManager):
self.connection_manager = connection_manager
async def get_realtime_memory_stats(
self,
tracker_type: str = "overview",
include_details: bool = True
) -> Dict[str, Any]:
"""
Get real-time memory statistics
Args:
tracker_type: Type of memory trackers to retrieve
include_details: Whether to include detailed information
Returns:
Dict containing memory statistics
"""
try:
# This is a placeholder implementation
# In a real implementation, this would fetch data from Doris BE memory tracker endpoints
return {
"success": True,
"tracker_type": tracker_type,
"include_details": include_details,
"timestamp": datetime.now().isoformat(),
"memory_stats": {
"total_memory": "8.00 GB",
"used_memory": "4.50 GB",
"free_memory": "3.50 GB",
"memory_usage_percent": 56.25
},
"note": "Memory tracker functionality requires BE HTTP endpoints to be available"
}
except Exception as e:
logger.error(f"Failed to get realtime memory stats: {str(e)}")
return {
"success": False,
"error": f"Failed to get realtime memory stats: {str(e)}",
"tracker_type": tracker_type,
"timestamp": datetime.now().isoformat()
}
async def get_historical_memory_stats(
self,
tracker_names: List[str] = None,
time_range: str = "1h"
) -> Dict[str, Any]:
"""
Get historical memory statistics
Args:
tracker_names: List of specific tracker names to query
time_range: Time range for historical data
Returns:
Dict containing historical memory statistics
"""
try:
# This is a placeholder implementation
# In a real implementation, this would fetch historical data from Doris BE bvar endpoints
return {
"success": True,
"tracker_names": tracker_names,
"time_range": time_range,
"timestamp": datetime.now().isoformat(),
"historical_stats": {
"data_points": 60,
"interval": "1m",
"memory_trend": "stable",
"avg_usage": "4.2 GB",
"peak_usage": "5.1 GB",
"min_usage": "3.8 GB"
},
"note": "Historical memory tracking functionality requires BE bvar endpoints to be available"
}
except Exception as e:
logger.error(f"Failed to get historical memory stats: {str(e)}")
return {
"success": False,
"error": f"Failed to get historical memory stats: {str(e)}",
"tracker_names": tracker_names,
"time_range": time_range,
"timestamp": datetime.now().isoformat()
}

View File

@@ -41,8 +41,16 @@ class DatabaseConfig:
port: int = 9030 port: int = 9030
user: str = "root" user: str = "root"
password: str = "" password: str = ""
database: str = "test" database: str = "information_schema"
charset: str = "utf8mb4" charset: str = "UTF8"
# FE HTTP API port for profile and other HTTP APIs
fe_http_port: int = 8030
# BE nodes configuration for external access
# If be_hosts is empty, will use "show backends" to get BE nodes
be_hosts: list[str] = field(default_factory=list)
be_webserver_port: int = 8040
# Connection pool configuration # Connection pool configuration
min_connections: int = 5 min_connections: int = 5
@@ -62,17 +70,26 @@ class SecurityConfig:
token_expiry: int = 3600 token_expiry: int = 3600
# SQL security configuration # SQL security configuration
enable_security_check: bool = True # Main switch: whether to enable SQL security check
blocked_keywords: list[str] = field( blocked_keywords: list[str] = field(
default_factory=lambda: [ default_factory=lambda: [
# DDL Operations (Data Definition Language)
"DROP", "DROP",
"DELETE",
"TRUNCATE",
"ALTER",
"CREATE", "CREATE",
"ALTER",
"TRUNCATE",
# DML Operations (Data Manipulation Language)
"DELETE",
"INSERT", "INSERT",
"UPDATE", "UPDATE",
# DCL Operations (Data Control Language)
"GRANT", "GRANT",
"REVOKE", "REVOKE",
# System Operations
"EXEC",
"EXECUTE",
"SHUTDOWN",
"KILL",
] ]
) )
max_query_complexity: int = 100 max_query_complexity: int = 100
@@ -103,6 +120,9 @@ class PerformanceConfig:
connection_pool_size: int = 20 connection_pool_size: int = 20
idle_timeout: int = 1800 idle_timeout: int = 1800
# Response content size limit (characters)
max_response_content_size: int = 4096
@dataclass @dataclass
class LoggingConfig: class LoggingConfig:
@@ -125,11 +145,11 @@ class MonitoringConfig:
# Metrics collection configuration # Metrics collection configuration
enable_metrics: bool = True enable_metrics: bool = True
metrics_port: int = 8081 metrics_port: int = 3001
metrics_path: str = "/metrics" metrics_path: str = "/metrics"
# Health check configuration # Health check configuration
health_check_port: int = 8082 health_check_port: int = 3002
health_check_path: str = "/health" health_check_path: str = "/health"
# Alert configuration # Alert configuration
@@ -143,8 +163,12 @@ class DorisConfig:
# Basic configuration # Basic configuration
server_name: str = "doris-mcp-server" server_name: str = "doris-mcp-server"
server_version: str = "1.0.0" server_version: str = "0.4.1"
server_port: int = 8080 server_port: int = 3000
transport: str = "stdio"
# Temporary files configuration
temp_files_dir: str = "tmp" # Temporary files directory for Explain and Profile outputs
# Sub-configuration modules # Sub-configuration modules
database: DatabaseConfig = field(default_factory=DatabaseConfig) database: DatabaseConfig = field(default_factory=DatabaseConfig)
@@ -215,6 +239,13 @@ class DorisConfig:
config.database.user = os.getenv("DORIS_USER", config.database.user) config.database.user = os.getenv("DORIS_USER", config.database.user)
config.database.password = os.getenv("DORIS_PASSWORD", config.database.password) config.database.password = os.getenv("DORIS_PASSWORD", config.database.password)
config.database.database = os.getenv("DORIS_DATABASE", config.database.database) config.database.database = os.getenv("DORIS_DATABASE", config.database.database)
config.database.fe_http_port = int(os.getenv("DORIS_FE_HTTP_PORT", str(config.database.fe_http_port)))
# BE nodes configuration
be_hosts_env = os.getenv("DORIS_BE_HOSTS", "")
if be_hosts_env:
config.database.be_hosts = [host.strip() for host in be_hosts_env.split(",") if host.strip()]
config.database.be_webserver_port = int(os.getenv("DORIS_BE_WEBSERVER_PORT", str(config.database.be_webserver_port)))
# Connection pool configuration # Connection pool configuration
config.database.min_connections = int( config.database.min_connections = int(
@@ -245,6 +276,22 @@ class DorisConfig:
config.security.max_query_complexity = int( config.security.max_query_complexity = int(
os.getenv("MAX_QUERY_COMPLEXITY", str(config.security.max_query_complexity)) os.getenv("MAX_QUERY_COMPLEXITY", str(config.security.max_query_complexity))
) )
config.security.enable_security_check = (
os.getenv("ENABLE_SECURITY_CHECK", str(config.security.enable_security_check).lower()).lower() == "true"
)
# Handle blocked keywords environment variable configuration
# Format: BLOCKED_KEYWORDS="DROP,DELETE,TRUNCATE,ALTER,CREATE,INSERT,UPDATE,GRANT,REVOKE"
blocked_keywords_env = os.getenv("BLOCKED_KEYWORDS", "")
if blocked_keywords_env:
# If environment variable is provided, use keywords list from environment variable
config.security.blocked_keywords = [
keyword.strip().upper()
for keyword in blocked_keywords_env.split(",")
if keyword.strip()
]
# If environment variable is empty, keep default configuration unchanged
config.security.enable_masking = ( config.security.enable_masking = (
os.getenv("ENABLE_MASKING", str(config.security.enable_masking).lower()).lower() == "true" os.getenv("ENABLE_MASKING", str(config.security.enable_masking).lower()).lower() == "true"
) )
@@ -265,6 +312,9 @@ class DorisConfig:
config.performance.query_timeout = int( config.performance.query_timeout = int(
os.getenv("QUERY_TIMEOUT", str(config.performance.query_timeout)) os.getenv("QUERY_TIMEOUT", str(config.performance.query_timeout))
) )
config.performance.max_response_content_size = int(
os.getenv("MAX_RESPONSE_CONTENT_SIZE", str(config.performance.max_response_content_size))
)
# Logging configuration # Logging configuration
config.logging.level = os.getenv("LOG_LEVEL", config.logging.level) config.logging.level = os.getenv("LOG_LEVEL", config.logging.level)
@@ -293,6 +343,7 @@ class DorisConfig:
config.server_name = os.getenv("SERVER_NAME", config.server_name) config.server_name = os.getenv("SERVER_NAME", config.server_name)
config.server_version = os.getenv("SERVER_VERSION", config.server_version) config.server_version = os.getenv("SERVER_VERSION", config.server_version)
config.server_port = int(os.getenv("SERVER_PORT", str(config.server_port))) config.server_port = int(os.getenv("SERVER_PORT", str(config.server_port)))
config.temp_files_dir = os.getenv("TEMP_FILES_DIR", config.temp_files_dir)
return config return config
@@ -302,7 +353,7 @@ class DorisConfig:
config = cls() config = cls()
# Update basic configuration # Update basic configuration
for key in ["server_name", "server_version", "server_port"]: for key in ["server_name", "server_version", "server_port", "temp_files_dir"]:
if key in config_data: if key in config_data:
setattr(config, key, config_data[key]) setattr(config, key, config_data[key])
@@ -352,6 +403,7 @@ class DorisConfig:
"server_name": self.server_name, "server_name": self.server_name,
"server_version": self.server_version, "server_version": self.server_version,
"server_port": self.server_port, "server_port": self.server_port,
"temp_files_dir": self.temp_files_dir,
"database": { "database": {
"host": self.database.host, "host": self.database.host,
"port": self.database.port, "port": self.database.port,
@@ -359,6 +411,9 @@ class DorisConfig:
"password": "***", # Hide password "password": "***", # Hide password
"database": self.database.database, "database": self.database.database,
"charset": self.database.charset, "charset": self.database.charset,
"fe_http_port": self.database.fe_http_port,
"be_hosts": self.database.be_hosts,
"be_webserver_port": self.database.be_webserver_port,
"min_connections": self.database.min_connections, "min_connections": self.database.min_connections,
"max_connections": self.database.max_connections, "max_connections": self.database.max_connections,
"connection_timeout": self.database.connection_timeout, "connection_timeout": self.database.connection_timeout,
@@ -369,6 +424,7 @@ class DorisConfig:
"auth_type": self.security.auth_type, "auth_type": self.security.auth_type,
"token_secret": "***", # Hide secret key "token_secret": "***", # Hide secret key
"token_expiry": self.security.token_expiry, "token_expiry": self.security.token_expiry,
"enable_security_check": self.security.enable_security_check,
"blocked_keywords": self.security.blocked_keywords, "blocked_keywords": self.security.blocked_keywords,
"max_query_complexity": self.security.max_query_complexity, "max_query_complexity": self.security.max_query_complexity,
"max_result_rows": self.security.max_result_rows, "max_result_rows": self.security.max_result_rows,
@@ -384,6 +440,7 @@ class DorisConfig:
"query_timeout": self.performance.query_timeout, "query_timeout": self.performance.query_timeout,
"connection_pool_size": self.performance.connection_pool_size, "connection_pool_size": self.performance.connection_pool_size,
"idle_timeout": self.performance.idle_timeout, "idle_timeout": self.performance.idle_timeout,
"max_response_content_size": self.performance.max_response_content_size,
}, },
"logging": { "logging": {
"level": self.logging.level, "level": self.logging.level,

View File

@@ -137,10 +137,29 @@ class DorisConnection:
async def ping(self) -> bool: async def ping(self) -> bool:
"""Check connection health status""" """Check connection health status"""
try: try:
# Check if connection exists and is not closed
if not self.connection or self.connection.closed:
self.is_healthy = False
return False
# Check if connection has _reader (aiomysql internal state)
# This prevents the 'NoneType' object has no attribute 'at_eof' error
if not hasattr(self.connection, '_reader') or self.connection._reader is None:
self.is_healthy = False
return False
# Additional check for reader's state
if hasattr(self.connection._reader, '_transport') and self.connection._reader._transport is None:
self.is_healthy = False
return False
# Try to ping the connection
await self.connection.ping() await self.connection.ping()
self.is_healthy = True self.is_healthy = True
return True return True
except Exception: except (AttributeError, OSError, ConnectionError, Exception) as e:
# Log the specific error for debugging
logging.debug(f"Connection ping failed for session {self.session_id}: {e}")
self.is_healthy = False self.is_healthy = False
return False return False
@@ -181,7 +200,17 @@ class DorisConnectionManager:
async def initialize(self): async def initialize(self):
"""Initialize connection manager""" """Initialize connection manager"""
try: try:
# Create connection pool self.logger.info(f"Initializing connection pool to {self.config.database.host}:{self.config.database.port}")
# Validate configuration
if not self.config.database.host:
raise ValueError("Database host is required")
if not self.config.database.user:
raise ValueError("Database user is required")
if not self.config.database.password:
self.logger.warning("Database password is empty, this may cause connection issues")
# Create connection pool with additional parameters for stability
self.pool = await aiomysql.create_pool( self.pool = await aiomysql.create_pool(
host=self.config.database.host, host=self.config.database.host,
port=self.config.database.port, port=self.config.database.port,
@@ -193,8 +222,15 @@ class DorisConnectionManager:
maxsize=self.config.database.max_connections or 20, maxsize=self.config.database.max_connections or 20,
autocommit=True, autocommit=True,
connect_timeout=self.connection_timeout, connect_timeout=self.connection_timeout,
# Additional parameters for stability
pool_recycle=3600, # Recycle connections every hour
echo=False, # Don't echo SQL statements
) )
# Test the connection pool
if not await self.test_connection():
raise RuntimeError("Connection pool test failed")
self.logger.info( self.logger.info(
f"Connection pool initialized successfully, min connections: {self.config.database.min_connections}, " f"Connection pool initialized successfully, min connections: {self.config.database.min_connections}, "
f"max connections: {self.config.database.max_connections}" f"max connections: {self.config.database.max_connections}"
@@ -206,6 +242,14 @@ class DorisConnectionManager:
except Exception as e: except Exception as e:
self.logger.error(f"Connection pool initialization failed: {e}") self.logger.error(f"Connection pool initialization failed: {e}")
# Clean up partial initialization
if self.pool:
try:
self.pool.close()
await self.pool.wait_closed()
except Exception:
pass
self.pool = None
raise raise
async def get_connection(self, session_id: str) -> DorisConnection: async def get_connection(self, session_id: str) -> DorisConnection:
@@ -235,9 +279,24 @@ class DorisConnectionManager:
# Get connection from pool # Get connection from pool
raw_connection = await self.pool.acquire() raw_connection = await self.pool.acquire()
# Validate the raw connection
if not raw_connection:
raise RuntimeError(f"Failed to acquire connection from pool for session {session_id}")
# Verify the connection is not closed
if raw_connection.closed:
raise RuntimeError(f"Acquired connection is already closed for session {session_id}")
# Create wrapped connection # Create wrapped connection
doris_conn = DorisConnection(raw_connection, session_id, self.security_manager) doris_conn = DorisConnection(raw_connection, session_id, self.security_manager)
# Test the connection before storing it
if not await doris_conn.ping():
# If ping fails, release the connection and raise error
if self.pool and raw_connection and not raw_connection.closed:
self.pool.release(raw_connection)
raise RuntimeError(f"New connection failed ping test for session {session_id}")
# Store in session connections # Store in session connections
self.session_connections[session_id] = doris_conn self.session_connections[session_id] = doris_conn
@@ -261,15 +320,34 @@ class DorisConnectionManager:
if session_id in self.session_connections: if session_id in self.session_connections:
conn = self.session_connections[session_id] conn = self.session_connections[session_id]
try: try:
# Return connection to pool # Return connection to pool only if it's valid and not closed
if self.pool and conn.connection and not conn.connection.closed: if (self.pool and
conn.connection and
not conn.connection.closed and
hasattr(conn.connection, '_reader') and
conn.connection._reader is not None):
try:
# Try to gracefully return to pool
self.pool.release(conn.connection) self.pool.release(conn.connection)
except Exception as pool_error:
self.logger.debug(f"Failed to return connection to pool for session {session_id}: {pool_error}")
# If pool release fails, try to close the connection directly
try:
await conn.connection.ensure_closed()
except Exception:
pass # Ignore errors during forced close
# Close connection wrapper # Close connection wrapper
await conn.close() await conn.close()
except Exception as e: except Exception as e:
self.logger.error(f"Error cleaning up connection for session {session_id}: {e}") self.logger.error(f"Error cleaning up connection for session {session_id}: {e}")
# Force close if normal cleanup fails
try:
if conn.connection and not conn.connection.closed:
await conn.connection.ensure_closed()
except Exception:
pass # Ignore errors during forced close
finally: finally:
# Remove from session connections # Remove from session connections
del self.session_connections[session_id] del self.session_connections[session_id]
@@ -291,12 +369,26 @@ class DorisConnectionManager:
try: try:
unhealthy_sessions = [] unhealthy_sessions = []
# First pass: check basic connectivity
for session_id, conn in self.session_connections.items(): for session_id, conn in self.session_connections.items():
if not await conn.ping(): if not await conn.ping():
unhealthy_sessions.append(session_id) unhealthy_sessions.append(session_id)
# Clean up unhealthy connections # Second pass: check for stale connections (over 30 minutes old)
for session_id in unhealthy_sessions: current_time = datetime.utcnow()
stale_sessions = []
for session_id, conn in self.session_connections.items():
if session_id not in unhealthy_sessions: # Don't double-check
last_used_delta = (current_time - conn.last_used).total_seconds()
if last_used_delta > 1800: # 30 minutes
# Force a ping check for stale connections
if not await conn.ping():
stale_sessions.append(session_id)
all_problematic_sessions = list(set(unhealthy_sessions + stale_sessions))
# Clean up problematic connections
for session_id in all_problematic_sessions:
await self._cleanup_session_connection(session_id) await self._cleanup_session_connection(session_id)
self.metrics.failed_connections += 1 self.metrics.failed_connections += 1
@@ -304,11 +396,19 @@ class DorisConnectionManager:
await self._update_connection_metrics() await self._update_connection_metrics()
self.metrics.last_health_check = datetime.utcnow() self.metrics.last_health_check = datetime.utcnow()
if unhealthy_sessions: if all_problematic_sessions:
self.logger.warning(f"Cleaned up {len(unhealthy_sessions)} unhealthy connections") self.logger.warning(f"Health check: cleaned up {len(unhealthy_sessions)} unhealthy and {len(stale_sessions)} stale connections")
else:
self.logger.debug(f"Health check: all {len(self.session_connections)} connections healthy")
except Exception as e: except Exception as e:
self.logger.error(f"Health check failed: {e}") self.logger.error(f"Health check failed: {e}")
# If health check fails, try to diagnose the issue
try:
diagnosis = await self.diagnose_connection_health()
self.logger.error(f"Connection diagnosis: {diagnosis}")
except Exception:
pass # Don't let diagnosis failure crash health check
async def _cleanup_loop(self): async def _cleanup_loop(self):
"""Background cleanup loop""" """Background cleanup loop"""
@@ -415,6 +515,93 @@ class DorisConnectionManager:
self.logger.error(f"Connection test failed: {e}") self.logger.error(f"Connection test failed: {e}")
return False return False
async def diagnose_connection_health(self) -> Dict[str, Any]:
"""Diagnose connection pool and session health"""
diagnosis = {
"timestamp": datetime.utcnow().isoformat(),
"pool_status": "unknown",
"session_connections": {},
"problematic_connections": [],
"recommendations": []
}
try:
# Check pool status
if not self.pool:
diagnosis["pool_status"] = "not_initialized"
diagnosis["recommendations"].append("Initialize connection pool")
return diagnosis
if self.pool.closed:
diagnosis["pool_status"] = "closed"
diagnosis["recommendations"].append("Recreate connection pool")
return diagnosis
diagnosis["pool_status"] = "healthy"
diagnosis["pool_info"] = {
"size": self.pool.size,
"free_size": self.pool.freesize,
"min_size": self.pool.minsize,
"max_size": self.pool.maxsize
}
# Check session connections
problematic_sessions = []
for session_id, conn in self.session_connections.items():
conn_status = {
"session_id": session_id,
"created_at": conn.created_at.isoformat(),
"last_used": conn.last_used.isoformat(),
"query_count": conn.query_count,
"is_healthy": conn.is_healthy
}
# Detailed connection checks
if conn.connection:
conn_status["connection_closed"] = conn.connection.closed
conn_status["has_reader"] = hasattr(conn.connection, '_reader') and conn.connection._reader is not None
if hasattr(conn.connection, '_reader') and conn.connection._reader:
conn_status["reader_transport"] = conn.connection._reader._transport is not None
else:
conn_status["reader_transport"] = False
else:
conn_status["connection_closed"] = True
conn_status["has_reader"] = False
conn_status["reader_transport"] = False
# Check if connection is problematic
if (not conn.is_healthy or
conn_status["connection_closed"] or
not conn_status["has_reader"] or
not conn_status["reader_transport"]):
problematic_sessions.append(session_id)
diagnosis["problematic_connections"].append(conn_status)
diagnosis["session_connections"][session_id] = conn_status
# Generate recommendations
if problematic_sessions:
diagnosis["recommendations"].append(f"Clean up {len(problematic_sessions)} problematic connections")
if self.pool.freesize == 0 and self.pool.size >= self.pool.maxsize:
diagnosis["recommendations"].append("Connection pool exhausted - consider increasing max_connections")
# Auto-cleanup problematic connections
for session_id in problematic_sessions:
try:
await self._cleanup_session_connection(session_id)
self.logger.info(f"Auto-cleaned problematic connection for session: {session_id}")
except Exception as e:
self.logger.error(f"Failed to auto-clean session {session_id}: {e}")
return diagnosis
except Exception as e:
diagnosis["error"] = str(e)
diagnosis["recommendations"].append("Manual intervention required")
return diagnosis
class ConnectionPoolMonitor: class ConnectionPoolMonitor:
"""Connection pool monitor """Connection pool monitor

File diff suppressed because it is too large Load Diff

View File

@@ -548,6 +548,10 @@ class DorisQueryExecutor:
user_id: str = "mcp_user" user_id: str = "mcp_user"
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Execute SQL query for MCP interface - unified method""" """Execute SQL query for MCP interface - unified method"""
max_retries = 2
retry_count = 0
while retry_count <= max_retries:
try: try:
if not sql: if not sql:
return { return {
@@ -579,46 +583,90 @@ class DorisQueryExecutor:
session_id=session_id, session_id=session_id,
user_id=user_id, user_id=user_id,
timeout=timeout, timeout=timeout,
cache_enabled=True cache_enabled=False # Disable cache for MCP calls to ensure fresh data
) )
# Execute query # Execute query with retry logic
try:
result = await self.execute_query(query_request, auth_context) result = await self.execute_query(query_request, auth_context)
# Process results # Serialize data for JSON response
processed_data = [] serialized_data = []
if result.data:
for row in result.data: for row in result.data:
processed_row = self._serialize_row_data(row) serialized_data.append(self._serialize_row_data(row))
processed_data.append(processed_row)
return { return {
"success": True, "success": True,
"data": processed_data, "data": serialized_data,
"metadata": {
"row_count": result.row_count, "row_count": result.row_count,
"execution_time": result.execution_time, "execution_time": result.execution_time,
"metadata": {
"columns": result.metadata.get("columns", []), "columns": result.metadata.get("columns", []),
"query": sql "query": sql
},
"error": None
} }
}
except Exception as query_error:
# Check if it's a connection-related error that we should retry
error_str = str(query_error).lower()
connection_errors = [
"at_eof", "connection", "closed", "nonetype",
"transport", "reader", "broken pipe", "connection reset"
]
is_connection_error = any(err in error_str for err in connection_errors)
if is_connection_error and retry_count < max_retries:
retry_count += 1
self.logger.warning(f"Connection error detected, retrying ({retry_count}/{max_retries}): {query_error}")
# Release the problematic connection
try:
await self.connection_manager.release_connection(session_id)
except Exception:
pass # Ignore cleanup errors
# Wait a bit before retry
await asyncio.sleep(0.5 * retry_count)
continue
else:
# Re-raise if not a connection error or max retries exceeded
raise query_error
except Exception as e: except Exception as e:
error_msg = str(e) error_msg = str(e)
self.logger.error(f"SQL execution error: {error_msg}")
# Analyze error for better user feedback # If we've exhausted retries or it's not a connection error, return error
if retry_count >= max_retries or "at_eof" not in error_msg.lower():
error_analysis = self._analyze_error(error_msg) error_analysis = self._analyze_error(error_msg)
return { return {
"success": False, "success": False,
"error": error_analysis.get("user_message", error_msg), "error": error_analysis.get("user_message", error_msg),
"error_type": error_analysis.get("error_type", "execution_error"), "error_type": error_analysis.get("error_type", "general_error"),
"data": None, "data": None,
"metadata": { "metadata": {
"query": sql, "query": sql,
"error_details": error_msg "error_details": error_msg,
"retry_count": retry_count
}
}
else:
# Try one more time for connection errors
retry_count += 1
if retry_count <= max_retries:
self.logger.warning(f"Retrying query due to connection error ({retry_count}/{max_retries}): {e}")
await asyncio.sleep(0.5 * retry_count)
continue
else:
return {
"success": False,
"error": f"Query failed after {max_retries} retries: {error_msg}",
"data": None,
"metadata": {
"query": sql,
"error_details": error_msg,
"retry_count": retry_count
} }
} }
@@ -649,7 +697,12 @@ class DorisQueryExecutor:
"""Analyze error message and provide user-friendly feedback""" """Analyze error message and provide user-friendly feedback"""
error_msg_lower = error_message.lower() error_msg_lower = error_message.lower()
if "table" in error_msg_lower and "doesn't exist" in error_msg_lower: if "at_eof" in error_msg_lower or "nonetype" in error_msg_lower and "at_eof" in error_msg_lower:
return {
"error_type": "connection_lost",
"user_message": "Database connection was lost. The query has been automatically retried. If this persists, please restart the server."
}
elif "table" in error_msg_lower and "doesn't exist" in error_msg_lower:
return { return {
"error_type": "table_not_found", "error_type": "table_not_found",
"user_message": "The specified table does not exist. Please check the table name and database." "user_message": "The specified table does not exist. Please check the table name and database."
@@ -674,6 +727,11 @@ class DorisQueryExecutor:
"error_type": "timeout", "error_type": "timeout",
"user_message": "Query execution timed out. Try simplifying your query or adding more specific filters." "user_message": "Query execution timed out. Try simplifying your query or adding more specific filters."
} }
elif "connection" in error_msg_lower and ("closed" in error_msg_lower or "reset" in error_msg_lower):
return {
"error_type": "connection_error",
"user_message": "Database connection was interrupted. The query has been automatically retried."
}
else: else:
return { return {
"error_type": "general_error", "error_type": "general_error",

View File

@@ -20,7 +20,6 @@ Doris Security Management Module
Implements enterprise-level authentication, authorization, SQL security validation and data masking functionality Implements enterprise-level authentication, authorization, SQL security validation and data masking functionality
""" """
import hashlib
import logging import logging
import re import re
from dataclasses import dataclass from dataclasses import dataclass
@@ -101,30 +100,24 @@ class DorisSecurityManager:
self.masking_rules = self._load_masking_rules() self.masking_rules = self._load_masking_rules()
def _load_blocked_keywords(self) -> set[str]: def _load_blocked_keywords(self) -> set[str]:
"""Load blocked SQL keywords""" """Load blocked SQL keywords from configuration"""
default_blocked = { # Load keywords from configuration, unified source of truth
"DROP",
"DELETE",
"TRUNCATE",
"ALTER",
"CREATE",
"INSERT",
"UPDATE",
"GRANT",
"REVOKE",
"EXEC",
"EXECUTE",
"SHUTDOWN",
"KILL",
}
# Load custom rules from configuration file
if hasattr(self.config, 'get'): if hasattr(self.config, 'get'):
custom_blocked = set(self.config.get("blocked_keywords", [])) # Dictionary-style configuration
blocked_keywords = self.config.get("blocked_keywords", [])
elif hasattr(self.config, 'security') and hasattr(self.config.security, 'blocked_keywords'):
# DorisConfig object, get through security.blocked_keywords
blocked_keywords = self.config.security.blocked_keywords
else: else:
custom_blocked = set() # Fallback to default if no configuration available
blocked_keywords = [
"DROP", "CREATE", "ALTER", "TRUNCATE",
"DELETE", "INSERT", "UPDATE",
"GRANT", "REVOKE",
"EXEC", "EXECUTE", "SHUTDOWN", "KILL"
]
return default_blocked.union(custom_blocked) return set(blocked_keywords)
def _load_sensitive_tables(self) -> dict[str, SecurityLevel]: def _load_sensitive_tables(self) -> dict[str, SecurityLevel]:
"""Load sensitive table configuration""" """Load sensitive table configuration"""
@@ -478,13 +471,30 @@ class SQLSecurityValidator:
# Dictionary configuration # Dictionary configuration
self.blocked_keywords = set(config.get("blocked_keywords", [])) self.blocked_keywords = set(config.get("blocked_keywords", []))
self.max_query_complexity = config.get("max_query_complexity", 100) self.max_query_complexity = config.get("max_query_complexity", 100)
self.enable_security_check = config.get("enable_security_check", True)
elif hasattr(config, 'security'):
# DorisConfig object with security attribute - unified source from config
self.blocked_keywords = set(config.security.blocked_keywords)
self.max_query_complexity = config.security.max_query_complexity
self.enable_security_check = getattr(config.security, 'enable_security_check', True)
else: else:
# DorisConfig object, use default values # Fallback to default if no configuration available
self.blocked_keywords = set(["DROP", "DELETE", "TRUNCATE", "ALTER", "CREATE", "INSERT", "UPDATE"]) self.blocked_keywords = set([
"DROP", "CREATE", "ALTER", "TRUNCATE",
"DELETE", "INSERT", "UPDATE",
"GRANT", "REVOKE",
"EXEC", "EXECUTE", "SHUTDOWN", "KILL"
])
self.max_query_complexity = 100 self.max_query_complexity = 100
self.enable_security_check = True
async def validate(self, sql: str, auth_context: AuthContext) -> ValidationResult: async def validate(self, sql: str, auth_context: AuthContext) -> ValidationResult:
"""Validate SQL query security""" """Validate SQL query security"""
# If security check is disabled, always return valid
if not self.enable_security_check:
self.logger.debug("SQL security check is disabled, allowing all queries")
return ValidationResult(is_valid=True)
try: try:
# Parse SQL statement # Parse SQL statement
parsed = sqlparse.parse(sql)[0] parsed = sqlparse.parse(sql)[0]

153
examples/dify/dify_demo.md Normal file
View File

@@ -0,0 +1,153 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# Dify Example: Integrating Doris MCP Server
This document demonstrates how to integrate and use `doris-mcp-server` in Dify to perform Doris SQL calls via MCP.
## Table of Contents
- [Prerequisites](#prerequisites)
- [Starting the MCP Server](#starting-the-mcp-server)
- [Ngrok Tunnel (Optional)](#ngrok-tunnel-optional)
- [Installing & Configuring the Plugin in Dify](#installing--configuring-the-plugin-in-dify)
- [Creating a Dify App](#creating-a-dify-app)
- [Adding MCP Tools](#adding-mcp-tools)
- [Example Calls](#example-calls)
-----
### Prerequisites
First, install `mcp-doris-server`:
```bash
pip install mcp-doris-server
```
## Starting the MCP Server
Run the startup script:
```bash
# Full configuration with database connection
doris-mcp-server \
--transport http \
--host 0.0.0.0 \
--port 3000 \
--db-host 127.0.0.1 \
--db-port 9030 \
--db-user root \
--db-password your_password
```
If successful, you'll see logs similar to this:
![Server start logs](../images/dify_start_server.png)
-----
## Ngrok Tunnel (Optional)
If your Dify deployment requires a publicly accessible endpoint, you can use the **ngrok** tool. Ngrok is a third-party service that securely exposes local servers to the internet.
-----
## Installing & Configuring the Plugin in Dify
1. In the Dify console, go to **Plugin Marketplace**, search for, and install **MCPSSE / StreamableHTTP**:
![Install plugin](../images/dify_install_plugin.png)
2. After installation, click **Configure** and set the URL to your public or local address. For example, if you're using `ngrok`, this should be the public URL `ngrok` provides, in the format `https://<your-domain>/mcp`. If Dify can directly access your local server, use `http://localhost:3000/mcp`.
```json
{
"doris_mcp_server": {
"transport": "streamable_http",
"url": "https://<your-domain>/mcp"
}
}
```
![Configure plugin](../images/dify_config_mcp.png)
3. Click **Save**. If configured correctly, you'll see a green **Authorized** indicator:
![Authorized](../images/dify_authorized.png)
-----
## Creating a Dify App
1. In the Dify console, click **New App** → **Blank App**.
![Create app](../images/dify_create_app.png)
2. Select **Agent** as the template and set the **App Name** (e.g., `Doris ChatBI`).
![Agent setup](../images/dify_agent_setup.png)
-----
## Instructions & Tool Configuration
### Instruction Block
Paste the following into the **Instruction** field:
```
<instruction>
Use MCP tools to complete tasks as much as possible. Carefully read the annotations, method names, and parameter descriptions of each tool. Please follow these steps:
1. Analyze the user's question and match the most appropriate tool.
2. Use tool names and parameters exactly as defined; do not invent new ones.
3. Pass parameters in the required JSON format.
4. When calling tools, use:
{"mcp_sse_call_tool": {"tool_name": "<tool_name>", "arguments": "{}"}}
5. Output plain text only—no XML tags.
<input>
User question: user_query
</input>
<output>
Return tool results or a final answer, including analysis.
</output>
</instruction>
```
### Adding MCP Tools
In the **Tools** pane, click **Add** twice to add two entries, both named `mcp_sse` (they will inherit the transport and URL from the plugin):
![Add tools](../images/dify_add_tools.png)
-----
## Example Calls
### List Tables in Database
* **User**: What tables are in the database?
* **Result**: Dify will call the MCP tool to run `SHOW TABLES` and return the list.
![Query tables](../images/dify_query_tabels.png)
### Sales Trend Over Ten Years
* **User**: What has been the sales trend over the past ten years in the ssb database, and which year had the fastest growth?
* **Result**: The tool will execute the SQL, calculate growth rates, and return data.
![Sales trend](../images/dify_sale_trend.png)

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 258 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 127 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 317 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 369 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 272 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

View File

@@ -20,7 +20,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "doris-mcp-server" name = "doris-mcp-server"
version = "0.3.0" version = "0.4.2"
description = "Enterprise-grade Model Context Protocol (MCP) server implementation for Apache Doris" description = "Enterprise-grade Model Context Protocol (MCP) server implementation for Apache Doris"
authors = [ authors = [
{name = "Yijia Su", email = "freeoneplus@apache.org"} {name = "Yijia Su", email = "freeoneplus@apache.org"}
@@ -42,7 +42,7 @@ classifiers = [
dependencies = [ dependencies = [
# Core MCP dependencies # Core MCP dependencies
"mcp>=1.0.0", "mcp>=1.8.0,<2.0.0",
# Database drivers # Database drivers
"aiomysql>=0.2.0", "aiomysql>=0.2.0",
"PyMySQL>=1.1.0", "PyMySQL>=1.1.0",

View File

@@ -1,21 +1,5 @@
# Licensed to the Apache Software Foundation (ASF) under one # Development dependencies - auto-generated from pyproject.toml
# or more contributor license agreements. See the NOTICE file # Installation command: pip install -r requirements-dev.txt
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# 开发依赖 - 从 pyproject.toml 自动生成
# 安装命令: pip install -r requirements-dev.txt
pytest>=7.4.0 pytest>=7.4.0
pytest-asyncio>=0.23.0 pytest-asyncio>=0.23.0

View File

@@ -1,24 +1,8 @@
# Licensed to the Apache Software Foundation (ASF) under one # Main dependencies - auto-generated from pyproject.toml
# or more contributor license agreements. See the NOTICE file # Do not edit this file manually, use 'python generate_requirements.py' to regenerate
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# 主要依赖 - 从 pyproject.toml 自动生成
# 请不要手动编辑此文件,使用 python generate_requirements.py 重新生成
# === 核心依赖 === # === Core Dependencies ===
mcp>=1.0.0 mcp>=1.8.0,<2.0.0
aiomysql>=0.2.0 aiomysql>=0.2.0
PyMySQL>=1.1.0 PyMySQL>=1.1.0
asyncio-mqtt>=0.16.0 asyncio-mqtt>=0.16.0
@@ -53,8 +37,11 @@ click>=8.1.0
typer>=0.9.0 typer>=0.9.0
requests>=2.31.0 requests>=2.31.0
tqdm>=4.66.0 tqdm>=4.66.0
pytest>=8.4.0
pytest-asyncio>=1.0.0
pytest-cov>=6.1.1
# === 开发依赖 === # === Development Dependencies ===
pytest>=7.4.0 pytest>=7.4.0
pytest-asyncio>=0.23.0 pytest-asyncio>=0.23.0
pytest-cov>=4.1.0 pytest-cov>=4.1.0

View File

@@ -47,22 +47,30 @@ def event_loop():
@pytest.fixture @pytest.fixture
def test_config(): def test_config():
"""Provide test configuration""" """Test configuration fixture"""
return { from doris_mcp_server.utils.config import DorisConfig, DatabaseConfig, SecurityConfig
"doris_host": "localhost",
"doris_port": 9030, config = DorisConfig()
"doris_user": "test_user",
"doris_password": "test_password", # Database configuration
"doris_database": "test_db", config.database.host = "localhost"
"blocked_keywords": ["DROP", "DELETE", "TRUNCATE", "ALTER", "CREATE", "INSERT", "UPDATE"], config.database.port = 9030
"sensitive_tables": { config.database.user = "test_user"
"user_info": "confidential", config.database.password = "test_password"
"payment_records": "secret", config.database.database = "test_db"
"employee_data": "confidential", config.database.health_check_interval = 60
"public_reports": "public" config.database.min_connections = 5
}, config.database.max_connections = 20
"max_query_complexity": 100 config.database.connection_timeout = 30
} config.database.max_connection_age = 3600
# Security configuration
config.security.enable_masking = True
config.security.auth_type = "token"
config.security.token_secret = "test_secret"
config.security.token_expiry = 3600
return config
@pytest.fixture @pytest.fixture

View File

@@ -37,14 +37,6 @@ class TestEndToEndIntegration:
from doris_mcp_server.utils.config import DatabaseConfig, SecurityConfig from doris_mcp_server.utils.config import DatabaseConfig, SecurityConfig
config = Mock(spec=DorisConfig) config = Mock(spec=DorisConfig)
config.doris_host = "localhost"
config.doris_port = 9030
config.doris_user = "test_user"
config.doris_password = "test_password"
config.doris_database = "test_db"
config.server_host = "localhost"
config.server_port = 8000
config.enable_security = True
# Add database config # Add database config
config.database = Mock(spec=DatabaseConfig) config.database = Mock(spec=DatabaseConfig)
@@ -277,10 +269,7 @@ class TestEndToEndIntegration:
] ]
# Test performance stats tool # Test performance stats tool
result = await doris_server.tools_manager.call_tool("performance_stats", { result = await doris_server.tools_manager.call_tool("get_db_list", {})
"metric_type": "queries",
"time_range": "1h"
})
result_data = json.loads(result) result_data = json.loads(result)
# Accept either success result or error (due to mock environment) # Accept either success result or error (due to mock environment)

View File

@@ -51,10 +51,15 @@
"get_table_comment", "get_table_comment",
"get_table_column_comments", "get_table_column_comments",
"get_table_indexes", "get_table_indexes",
"column_analysis",
"performance_stats",
"get_recent_audit_logs", "get_recent_audit_logs",
"get_catalog_list" "get_catalog_list",
"get_sql_explain",
"get_sql_profile",
"get_table_data_size",
"get_monitoring_metrics_info",
"get_monitoring_metrics_data",
"get_realtime_memory_stats",
"get_historical_memory_stats"
], ],
"expected_resources": [ "expected_resources": [
"database", "database",

View File

@@ -136,24 +136,7 @@ class TestToolsClientServer:
result = await client.connect_and_run(test_callback) result = await client.connect_and_run(test_callback)
assert "success" in result assert "success" in result
@pytest.mark.asyncio
async def test_call_tool_performance_stats_via_client(self, client, test_config):
"""Test calling performance_stats tool through client"""
if not test_config.is_performance_tests_enabled():
pytest.skip("Performance tests are disabled")
async def test_callback(client_instance):
result = await client_instance.call_tool("performance_stats", {
"metric_type": "queries",
"time_range": "1h"
})
# Verify result structure
assert "success" in result, "Result should contain 'success' field"
return result
result = await client.connect_and_run(test_callback)
assert "success" in result
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_tool_error_handling_via_client(self, client, test_config): async def test_tool_error_handling_via_client(self, client, test_config):

View File

@@ -36,11 +36,6 @@ class TestDorisToolsManager:
from doris_mcp_server.utils.config import DatabaseConfig, SecurityConfig from doris_mcp_server.utils.config import DatabaseConfig, SecurityConfig
config = Mock(spec=DorisConfig) config = Mock(spec=DorisConfig)
config.doris_host = "localhost"
config.doris_port = 9030
config.doris_user = "test_user"
config.doris_password = "test_password"
config.doris_database = "test_db"
# Add database config # Add database config
config.database = Mock(spec=DatabaseConfig) config.database = Mock(spec=DatabaseConfig)
@@ -235,62 +230,7 @@ class TestDorisToolsManager:
elif "result" in result_data: elif "result" in result_data:
assert len(result_data["result"]) >= 0 # May be empty if no catalogs assert len(result_data["result"]) >= 0 # May be empty if no catalogs
@pytest.mark.asyncio
async def test_column_analysis_tool(self, tools_manager):
"""Test column_analysis tool"""
with patch.object(tools_manager.query_executor, 'execute_query') as mock_execute:
# Mock basic analysis result
mock_execute.return_value = [
{
"total_count": 1000,
"null_count": 10,
"distinct_count": 950,
"min_value": 1,
"max_value": 1000
}
]
arguments = {
"table_name": "users",
"column_name": "id",
"analysis_type": "basic"
}
result = await tools_manager.call_tool("column_analysis", arguments)
result_data = json.loads(result) if isinstance(result, str) else result
# Check if result has analysis field or result field
if "analysis" in result_data:
assert result_data["analysis"]["total_count"] == 1000
elif "result" in result_data:
assert "result" in result_data # Just check result exists
@pytest.mark.asyncio
async def test_performance_stats_tool(self, tools_manager):
"""Test performance_stats tool"""
with patch.object(tools_manager.query_executor, 'execute_query') as mock_execute:
mock_execute.return_value = [
{
"query_count": 1500,
"avg_execution_time": 0.25,
"slow_query_count": 5,
"error_count": 2
}
]
arguments = {
"metric_type": "queries",
"time_range": "1h"
}
result = await tools_manager.call_tool("performance_stats", arguments)
result_data = json.loads(result) if isinstance(result, str) else result
# Check if result has stats field or result field
if "stats" in result_data:
assert result_data["stats"]["query_count"] == 1500
elif "result" in result_data:
assert "result" in result_data # Just check result exists
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_invalid_tool_name(self, tools_manager): async def test_invalid_tool_name(self, tools_manager):

View File

@@ -35,11 +35,6 @@ class TestDorisQueryExecutor:
from doris_mcp_server.utils.config import DatabaseConfig, SecurityConfig from doris_mcp_server.utils.config import DatabaseConfig, SecurityConfig
config = Mock(spec=DorisConfig) config = Mock(spec=DorisConfig)
config.doris_host = "localhost"
config.doris_port = 9030
config.doris_user = "test_user"
config.doris_password = "test_password"
config.doris_database = "test_db"
# Add database config # Add database config
config.database = Mock(spec=DatabaseConfig) config.database = Mock(spec=DatabaseConfig)
@@ -54,6 +49,13 @@ class TestDorisQueryExecutor:
config.database.connection_timeout = 30 config.database.connection_timeout = 30
config.database.max_connection_age = 3600 config.database.max_connection_age = 3600
# Add security config
config.security = Mock(spec=SecurityConfig)
config.security.enable_masking = True
config.security.auth_type = "token"
config.security.token_secret = "test_secret"
config.security.token_expiry = 3600
return config return config
@pytest.fixture @pytest.fixture

344
uv.lock generated
View File

@@ -1,19 +1,3 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
version = 1 version = 1
revision = 1 revision = 1
requires-python = ">=3.12" requires-python = ">=3.12"
@@ -534,170 +518,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 },
] ]
[[package]]
name = "doris-mcp-server"
version = "0.3.0"
source = { editable = "." }
dependencies = [
{ name = "aiofiles" },
{ name = "aiohttp" },
{ name = "aiomysql" },
{ name = "aioredis" },
{ name = "asyncio-mqtt" },
{ name = "bcrypt" },
{ name = "click" },
{ name = "cryptography" },
{ name = "fastapi" },
{ name = "httpx" },
{ name = "mcp" },
{ name = "numpy" },
{ name = "orjson" },
{ name = "pandas" },
{ name = "passlib", extra = ["bcrypt"] },
{ name = "prometheus-client" },
{ name = "pydantic" },
{ name = "pydantic-settings" },
{ name = "pyjwt" },
{ name = "pymysql" },
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "pytest-cov" },
{ name = "python-dateutil" },
{ name = "python-dotenv" },
{ name = "python-jose", extra = ["cryptography"] },
{ name = "python-multipart" },
{ name = "pyyaml" },
{ name = "requests" },
{ name = "rich" },
{ name = "sqlparse" },
{ name = "starlette" },
{ name = "structlog" },
{ name = "toml" },
{ name = "tqdm" },
{ name = "typer" },
{ name = "uvicorn", extra = ["standard"] },
{ name = "websockets" },
]
[package.optional-dependencies]
dev = [
{ name = "bandit" },
{ name = "black" },
{ name = "flake8" },
{ name = "isort" },
{ name = "mypy" },
{ name = "myst-parser" },
{ name = "pre-commit" },
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "pytest-cov" },
{ name = "pytest-mock" },
{ name = "pytest-xdist" },
{ name = "ruff" },
{ name = "safety" },
{ name = "sphinx" },
{ name = "sphinx-rtd-theme" },
{ name = "tox" },
]
docs = [
{ name = "myst-parser" },
{ name = "sphinx" },
{ name = "sphinx-autoapi" },
{ name = "sphinx-rtd-theme" },
]
monitoring = [
{ name = "grafana-client" },
{ name = "jaeger-client" },
{ name = "opentelemetry-api" },
{ name = "opentelemetry-sdk" },
{ name = "prometheus-client" },
]
performance = [
{ name = "cchardet" },
{ name = "orjson" },
{ name = "uvloop" },
]
[package.dev-dependencies]
dev = [
{ name = "ruff" },
]
[package.metadata]
requires-dist = [
{ name = "aiofiles", specifier = ">=23.0.0" },
{ name = "aiohttp", specifier = ">=3.9.0" },
{ name = "aiomysql", specifier = ">=0.2.0" },
{ name = "aioredis", specifier = ">=2.0.0" },
{ name = "asyncio-mqtt", specifier = ">=0.16.0" },
{ name = "bandit", marker = "extra == 'dev'", specifier = ">=1.7.0" },
{ name = "bcrypt", specifier = ">=4.1.0" },
{ name = "black", marker = "extra == 'dev'", specifier = ">=23.12.0" },
{ name = "cchardet", marker = "extra == 'performance'", specifier = ">=2.1.0" },
{ name = "click", specifier = ">=8.1.0" },
{ name = "cryptography", specifier = ">=41.0.0" },
{ name = "fastapi", specifier = ">=0.108.0" },
{ name = "flake8", marker = "extra == 'dev'", specifier = ">=7.0.0" },
{ name = "grafana-client", marker = "extra == 'monitoring'", specifier = ">=3.5.0" },
{ name = "httpx", specifier = ">=0.26.0" },
{ name = "isort", marker = "extra == 'dev'", specifier = ">=5.13.0" },
{ name = "jaeger-client", marker = "extra == 'monitoring'", specifier = ">=4.8.0" },
{ name = "mcp", specifier = ">=1.0.0" },
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8.0" },
{ name = "myst-parser", marker = "extra == 'dev'", specifier = ">=2.0.0" },
{ name = "myst-parser", marker = "extra == 'docs'", specifier = ">=2.0.0" },
{ name = "numpy", specifier = ">=1.24.0" },
{ name = "opentelemetry-api", marker = "extra == 'monitoring'", specifier = ">=1.21.0" },
{ name = "opentelemetry-sdk", marker = "extra == 'monitoring'", specifier = ">=1.21.0" },
{ name = "orjson", specifier = ">=3.9.0" },
{ name = "orjson", marker = "extra == 'performance'", specifier = ">=3.9.0" },
{ name = "pandas", specifier = ">=2.0.0" },
{ name = "passlib", extras = ["bcrypt"], specifier = ">=1.7.0" },
{ name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.6.0" },
{ name = "prometheus-client", specifier = ">=0.19.0" },
{ name = "prometheus-client", marker = "extra == 'monitoring'", specifier = ">=0.19.0" },
{ name = "pydantic", specifier = ">=2.5.0" },
{ name = "pydantic-settings", specifier = ">=2.1.0" },
{ name = "pyjwt", specifier = ">=2.8.0" },
{ name = "pymysql", specifier = ">=1.1.0" },
{ name = "pytest", specifier = ">=8.4.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=7.4.0" },
{ name = "pytest-asyncio", specifier = ">=1.0.0" },
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
{ name = "pytest-cov", specifier = ">=6.1.1" },
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.1.0" },
{ name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12.0" },
{ name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.5.0" },
{ name = "python-dateutil", specifier = ">=2.8.0" },
{ name = "python-dotenv", specifier = ">=1.0.0" },
{ name = "python-jose", extras = ["cryptography"], specifier = ">=3.3.0" },
{ name = "python-multipart", specifier = ">=0.0.6" },
{ name = "pyyaml", specifier = ">=6.0.0" },
{ name = "requests", specifier = ">=2.31.0" },
{ name = "rich", specifier = ">=13.7.0" },
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
{ name = "safety", marker = "extra == 'dev'", specifier = ">=2.3.0" },
{ name = "sphinx", marker = "extra == 'dev'", specifier = ">=7.2.0" },
{ name = "sphinx", marker = "extra == 'docs'", specifier = ">=7.2.0" },
{ name = "sphinx-autoapi", marker = "extra == 'docs'", specifier = ">=3.0.0" },
{ name = "sphinx-rtd-theme", marker = "extra == 'dev'", specifier = ">=2.0.0" },
{ name = "sphinx-rtd-theme", marker = "extra == 'docs'", specifier = ">=2.0.0" },
{ name = "sqlparse", specifier = ">=0.4.4" },
{ name = "starlette", specifier = ">=0.27.0" },
{ name = "structlog", specifier = ">=23.2.0" },
{ name = "toml", specifier = ">=0.10.0" },
{ name = "tox", marker = "extra == 'dev'", specifier = ">=4.11.0" },
{ name = "tqdm", specifier = ">=4.66.0" },
{ name = "typer", specifier = ">=0.9.0" },
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.25.0" },
{ name = "uvloop", marker = "extra == 'performance'", specifier = ">=0.19.0" },
{ name = "websockets", specifier = ">=12.0" },
]
provides-extras = ["dev", "docs", "performance", "monitoring"]
[package.metadata.requires-dev]
dev = [{ name = "ruff", specifier = ">=0.11.13" }]
[[package]] [[package]]
name = "dparse" name = "dparse"
version = "0.6.4" version = "0.6.4"
@@ -1126,6 +946,170 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/79/45/823ad05504bea55cb0feb7470387f151252127ad5c72f8882e8fe6cf5c0e/mcp-1.9.3-py3-none-any.whl", hash = "sha256:69b0136d1ac9927402ed4cf221d4b8ff875e7132b0b06edd446448766f34f9b9", size = 131063 }, { url = "https://files.pythonhosted.org/packages/79/45/823ad05504bea55cb0feb7470387f151252127ad5c72f8882e8fe6cf5c0e/mcp-1.9.3-py3-none-any.whl", hash = "sha256:69b0136d1ac9927402ed4cf221d4b8ff875e7132b0b06edd446448766f34f9b9", size = 131063 },
] ]
[[package]]
name = "mcp-doris-server"
version = "0.4.2"
source = { editable = "." }
dependencies = [
{ name = "aiofiles" },
{ name = "aiohttp" },
{ name = "aiomysql" },
{ name = "aioredis" },
{ name = "asyncio-mqtt" },
{ name = "bcrypt" },
{ name = "click" },
{ name = "cryptography" },
{ name = "fastapi" },
{ name = "httpx" },
{ name = "mcp" },
{ name = "numpy" },
{ name = "orjson" },
{ name = "pandas" },
{ name = "passlib", extra = ["bcrypt"] },
{ name = "prometheus-client" },
{ name = "pydantic" },
{ name = "pydantic-settings" },
{ name = "pyjwt" },
{ name = "pymysql" },
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "pytest-cov" },
{ name = "python-dateutil" },
{ name = "python-dotenv" },
{ name = "python-jose", extra = ["cryptography"] },
{ name = "python-multipart" },
{ name = "pyyaml" },
{ name = "requests" },
{ name = "rich" },
{ name = "sqlparse" },
{ name = "starlette" },
{ name = "structlog" },
{ name = "toml" },
{ name = "tqdm" },
{ name = "typer" },
{ name = "uvicorn", extra = ["standard"] },
{ name = "websockets" },
]
[package.optional-dependencies]
dev = [
{ name = "bandit" },
{ name = "black" },
{ name = "flake8" },
{ name = "isort" },
{ name = "mypy" },
{ name = "myst-parser" },
{ name = "pre-commit" },
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "pytest-cov" },
{ name = "pytest-mock" },
{ name = "pytest-xdist" },
{ name = "ruff" },
{ name = "safety" },
{ name = "sphinx" },
{ name = "sphinx-rtd-theme" },
{ name = "tox" },
]
docs = [
{ name = "myst-parser" },
{ name = "sphinx" },
{ name = "sphinx-autoapi" },
{ name = "sphinx-rtd-theme" },
]
monitoring = [
{ name = "grafana-client" },
{ name = "jaeger-client" },
{ name = "opentelemetry-api" },
{ name = "opentelemetry-sdk" },
{ name = "prometheus-client" },
]
performance = [
{ name = "cchardet" },
{ name = "orjson" },
{ name = "uvloop" },
]
[package.dev-dependencies]
dev = [
{ name = "ruff" },
]
[package.metadata]
requires-dist = [
{ name = "aiofiles", specifier = ">=23.0.0" },
{ name = "aiohttp", specifier = ">=3.9.0" },
{ name = "aiomysql", specifier = ">=0.2.0" },
{ name = "aioredis", specifier = ">=2.0.0" },
{ name = "asyncio-mqtt", specifier = ">=0.16.0" },
{ name = "bandit", marker = "extra == 'dev'", specifier = ">=1.7.0" },
{ name = "bcrypt", specifier = ">=4.1.0" },
{ name = "black", marker = "extra == 'dev'", specifier = ">=23.12.0" },
{ name = "cchardet", marker = "extra == 'performance'", specifier = ">=2.1.0" },
{ name = "click", specifier = ">=8.1.0" },
{ name = "cryptography", specifier = ">=41.0.0" },
{ name = "fastapi", specifier = ">=0.108.0" },
{ name = "flake8", marker = "extra == 'dev'", specifier = ">=7.0.0" },
{ name = "grafana-client", marker = "extra == 'monitoring'", specifier = ">=3.5.0" },
{ name = "httpx", specifier = ">=0.26.0" },
{ name = "isort", marker = "extra == 'dev'", specifier = ">=5.13.0" },
{ name = "jaeger-client", marker = "extra == 'monitoring'", specifier = ">=4.8.0" },
{ name = "mcp", specifier = ">=1.8.0,<2.0.0" },
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8.0" },
{ name = "myst-parser", marker = "extra == 'dev'", specifier = ">=2.0.0" },
{ name = "myst-parser", marker = "extra == 'docs'", specifier = ">=2.0.0" },
{ name = "numpy", specifier = ">=1.24.0" },
{ name = "opentelemetry-api", marker = "extra == 'monitoring'", specifier = ">=1.21.0" },
{ name = "opentelemetry-sdk", marker = "extra == 'monitoring'", specifier = ">=1.21.0" },
{ name = "orjson", specifier = ">=3.9.0" },
{ name = "orjson", marker = "extra == 'performance'", specifier = ">=3.9.0" },
{ name = "pandas", specifier = ">=2.0.0" },
{ name = "passlib", extras = ["bcrypt"], specifier = ">=1.7.0" },
{ name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.6.0" },
{ name = "prometheus-client", specifier = ">=0.19.0" },
{ name = "prometheus-client", marker = "extra == 'monitoring'", specifier = ">=0.19.0" },
{ name = "pydantic", specifier = ">=2.5.0" },
{ name = "pydantic-settings", specifier = ">=2.1.0" },
{ name = "pyjwt", specifier = ">=2.8.0" },
{ name = "pymysql", specifier = ">=1.1.0" },
{ name = "pytest", specifier = ">=8.4.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=7.4.0" },
{ name = "pytest-asyncio", specifier = ">=1.0.0" },
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
{ name = "pytest-cov", specifier = ">=6.1.1" },
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.1.0" },
{ name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12.0" },
{ name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.5.0" },
{ name = "python-dateutil", specifier = ">=2.8.0" },
{ name = "python-dotenv", specifier = ">=1.0.0" },
{ name = "python-jose", extras = ["cryptography"], specifier = ">=3.3.0" },
{ name = "python-multipart", specifier = ">=0.0.6" },
{ name = "pyyaml", specifier = ">=6.0.0" },
{ name = "requests", specifier = ">=2.31.0" },
{ name = "rich", specifier = ">=13.7.0" },
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
{ name = "safety", marker = "extra == 'dev'", specifier = ">=2.3.0" },
{ name = "sphinx", marker = "extra == 'dev'", specifier = ">=7.2.0" },
{ name = "sphinx", marker = "extra == 'docs'", specifier = ">=7.2.0" },
{ name = "sphinx-autoapi", marker = "extra == 'docs'", specifier = ">=3.0.0" },
{ name = "sphinx-rtd-theme", marker = "extra == 'dev'", specifier = ">=2.0.0" },
{ name = "sphinx-rtd-theme", marker = "extra == 'docs'", specifier = ">=2.0.0" },
{ name = "sqlparse", specifier = ">=0.4.4" },
{ name = "starlette", specifier = ">=0.27.0" },
{ name = "structlog", specifier = ">=23.2.0" },
{ name = "toml", specifier = ">=0.10.0" },
{ name = "tox", marker = "extra == 'dev'", specifier = ">=4.11.0" },
{ name = "tqdm", specifier = ">=4.66.0" },
{ name = "typer", specifier = ">=0.9.0" },
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.25.0" },
{ name = "uvloop", marker = "extra == 'performance'", specifier = ">=0.19.0" },
{ name = "websockets", specifier = ">=12.0" },
]
provides-extras = ["dev", "docs", "performance", "monitoring"]
[package.metadata.requires-dev]
dev = [{ name = "ruff", specifier = ">=0.11.13" }]
[[package]] [[package]]
name = "mdit-py-plugins" name = "mdit-py-plugins"
version = "0.4.2" version = "0.4.2"