Files
doris-mcp-server/doris_mcp_server/tools/prompts_manager.py
2025-06-08 18:44:40 +08:00

456 lines
16 KiB
Python

"""
Apache Doris MCP Prompts Manager
Provides standardized management of query templates and intelligent prompts
"""
from datetime import datetime
from typing import Any
from mcp.types import (
GetPromptResult,
Prompt,
PromptArgument,
PromptMessage,
TextContent,
)
from ..utils.db import DorisConnectionManager
class PromptTemplate:
"""Prompt template"""
def __init__(
self,
name: str,
description: str,
template: str,
arguments: list[PromptArgument] = None,
category: str = "general",
):
self.name = name
self.description = description
self.template = template
self.arguments = arguments or []
self.category = category
self.created_at = datetime.now()
def render(self, arguments: dict[str, Any]) -> str:
"""Render template content"""
content = self.template
for key, value in arguments.items():
placeholder = f"{{{key}}}"
content = content.replace(placeholder, str(value))
return content
class DorisPromptsManager:
"""Apache Doris Prompts Manager"""
def __init__(self, connection_manager: DorisConnectionManager):
self.connection_manager = connection_manager
self.templates = self._init_prompt_templates()
def _init_prompt_templates(self) -> dict[str, PromptTemplate]:
"""Initialize prompt templates"""
templates = {}
# Sales data analysis template
templates["sales_analysis"] = PromptTemplate(
name="sales_analysis",
description="Sales data analysis query template for generating sales statistics and trend analysis queries",
template="""Please help me analyze sales data with the following requirements:
Analysis time range: {date_range}
{product_filter}
{region_filter}
Please generate SQL queries to analyze the following dimensions:
1. Total sales amount and order quantity
2. Sales trends by time dimension
3. Top-selling product rankings
4. Sales personnel performance statistics
Data table structure reference:
- Order table: Contains order ID, customer ID, salesperson ID, order amount, order time and other fields
- Product table: Contains product ID, product name, product category, price and other fields
- Customer table: Contains customer ID, customer name, region and other fields
Please ensure query results are easy to understand and analyze.""",
arguments=[
PromptArgument(
name="date_range",
description="Date range for analysis, such as 'Q1 2024' or 'last 30 days'",
required=True,
),
PromptArgument(
name="product_category",
description="Product category filter condition, such as 'electronics'",
required=False,
),
PromptArgument(
name="region",
description="Sales region filter condition, such as 'East China'",
required=False,
),
],
category="business_analysis",
)
# User behavior analysis template
templates["user_behavior_analysis"] = PromptTemplate(
name="user_behavior_analysis",
description="User behavior analysis query template for analyzing user activity patterns and preferences",
template="""Please help me analyze user behavior data, analysis objectives:
User segment: {user_segment}
{behavior_filter}
Analysis period: {time_period}
Please generate SQL queries to analyze the following aspects:
1. User activity statistics (DAU, MAU)
2. User behavior path analysis
3. Feature usage preference statistics
4. User retention rate analysis
Data table structure reference:
- User table: Contains user ID, registration time, user type, region and other fields
- Behavior log table: Contains user ID, behavior type, behavior time, page path and other fields
- Session table: Contains session ID, user ID, session start time, session duration and other fields
Please provide easy-to-understand statistical results and visualization suggestions.""",
arguments=[
PromptArgument(
name="user_segment",
description="User segment conditions, such as 'new users', 'active users'",
required=True,
),
PromptArgument(
name="behavior_type",
description="Behavior type filter, such as 'login', 'purchase', 'browse'",
required=False,
),
PromptArgument(
name="time_period",
description="Analysis time period, such as 'last 7 days', 'this month'",
required=False,
),
],
category="user_analysis",
)
# Performance optimization analysis template
templates["performance_optimization"] = PromptTemplate(
name="performance_optimization",
description="Database performance optimization analysis template for identifying performance bottlenecks and optimization opportunities",
template="""Please help me with database performance analysis and optimization recommendations:
Focus area: {focus_area}
{table_scope}
Performance metrics: {metrics}
Please generate SQL queries to analyze the following content:
1. Table and query performance statistics
2. Index usage efficiency analysis
3. Slow query identification and analysis
4. Storage space usage
Analysis objectives:
- Identify performance bottlenecks
- Provide optimization recommendations
- Evaluate optimization effects
Please provide specific optimization recommendations and implementation steps.""",
arguments=[
PromptArgument(
name="focus_area",
description="Performance area of focus, such as 'query performance', 'storage optimization'",
required=True,
),
PromptArgument(
name="table_name",
description="Specific table name (optional), if analyzing specific table performance",
required=False,
),
PromptArgument(
name="metrics",
description="Performance metrics of interest, such as 'response time', 'throughput'",
required=False,
),
],
category="performance",
)
# Data quality check template
templates["data_quality_check"] = PromptTemplate(
name="data_quality_check",
description="Data quality check template for detecting data integrity and consistency issues",
template="""Please help me perform data quality checks:
Check target: {target_table}
{quality_dimensions}
Check level: {check_level}
Please generate SQL queries to check the following data quality issues:
1. Data integrity (null values, duplicate values)
2. Data consistency (format, range)
3. Data accuracy (business rule validation)
4. Data timeliness (update frequency)
Check items:
- Required field null value checks
- Primary key and unique constraint validation
- Data format and type checks
- Business logic consistency validation
- Data distribution anomaly detection
Please provide detailed problem reports and fix recommendations.""",
arguments=[
PromptArgument(
name="target_table", description="Target table name to check", required=True
),
PromptArgument(
name="quality_dimensions",
description="Quality check dimensions, such as 'integrity', 'consistency', 'accuracy'",
required=False,
),
PromptArgument(
name="check_level",
description="Check level, such as 'basic check', 'deep check'",
required=False,
),
],
category="data_quality",
)
# Report generation template
templates["report_generation"] = PromptTemplate(
name="report_generation",
description="Business report generation template for creating standardized business reports",
template="""Please help me generate business reports:
Report type: {report_type}
Report period: {report_period}
{business_scope}
Please generate SQL queries to build the following report content:
1. Key business indicator summary
2. Trend analysis and year-over-year/month-over-month comparison
3. Anomaly data identification and explanation
4. Business insights and recommendations
Report requirements:
- Data accuracy and timeliness
- Clear hierarchical structure
- Easy-to-understand data presentation
- Decision-supporting analytical perspective
Please provide complete report structure and data acquisition logic.""",
arguments=[
PromptArgument(
name="report_type",
description="Report type, such as 'sales report', 'operations report', 'financial report'",
required=True,
),
PromptArgument(
name="report_period",
description="Report period, such as 'daily report', 'weekly report', 'monthly report'",
required=True,
),
PromptArgument(
name="business_unit",
description="Business unit scope, such as 'East China region', 'Product line A'",
required=False,
),
],
category="reporting",
)
# Real-time monitoring template
templates["real_time_monitoring"] = PromptTemplate(
name="real_time_monitoring",
description="Real-time monitoring query template for building real-time data monitoring and alerting",
template="""Please help me design real-time monitoring queries:
Monitoring target: {monitoring_target}
Alert threshold: {alert_threshold}
Monitoring frequency: {monitoring_frequency}
Please generate SQL queries to implement the following monitoring functions:
1. Real-time statistics of key indicators
2. Anomaly detection and alerting
3. Trend change monitoring
4. System health status checks
Monitoring dimensions:
- Business indicator monitoring (transaction volume, user activity, etc.)
- Technical indicator monitoring (performance, error rate, etc.)
- Data quality monitoring (integrity, consistency, etc.)
Please provide complete monitoring solution and implementation recommendations.""",
arguments=[
PromptArgument(
name="monitoring_target",
description="Monitoring target, such as 'transaction system', 'user activity'",
required=True,
),
PromptArgument(
name="alert_threshold",
description="Alert threshold setting, such as 'error rate > 5%'",
required=False,
),
PromptArgument(
name="monitoring_frequency",
description="Monitoring frequency, such as 'real-time', 'every minute', 'every 5 minutes'",
required=False,
),
],
category="monitoring",
)
return templates
async def list_prompts(self) -> list[Prompt]:
"""List all available prompt templates"""
prompts = []
for template in self.templates.values():
prompt = Prompt(
name=template.name,
description=template.description,
arguments=template.arguments,
)
prompts.append(prompt)
return prompts
async def get_prompt(self, name: str, arguments: dict[str, Any]) -> GetPromptResult:
"""Get content of specific prompt template"""
if name not in self.templates:
raise ValueError(f"Prompt template named '{name}' not found")
template = self.templates[name]
# Process optional arguments
processed_args = await self._process_arguments(template, arguments)
# Render template content
rendered_content = template.render(processed_args)
# Add database context information
context_info = await self._get_database_context()
full_content = f"""{rendered_content}
Database context information:
{context_info}
Please generate accurate and efficient SQL queries based on the above requirements and database structure."""
return GetPromptResult(
description=template.description,
messages=[
PromptMessage(
role="user", content=TextContent(type="text", text=full_content)
)
],
)
async def _process_arguments(
self, template: PromptTemplate, arguments: dict[str, Any]
) -> dict[str, Any]:
"""Process template arguments"""
processed = {}
for arg in template.arguments:
if arg.name in arguments:
processed[arg.name] = arguments[arg.name]
elif arg.required:
raise ValueError(f"Missing required parameter: {arg.name}")
else:
# Provide default handling for optional parameters
processed[arg.name] = self._get_default_argument_text(arg.name)
return processed
def _get_default_argument_text(self, arg_name: str) -> str:
"""Get default text for optional parameters"""
defaults = {
"product_category": "",
"region": "",
"behavior_type": "",
"time_period": "No time range restriction",
"table_name": "",
"metrics": "All performance metrics",
"quality_dimensions": "All quality dimensions",
"check_level": "Standard check",
"business_unit": "Full business scope",
"alert_threshold": "Use default threshold",
"monitoring_frequency": "Real-time monitoring",
}
return defaults.get(arg_name, "")
async def _get_database_context(self) -> str:
"""Get database context information"""
try:
connection = await self.connection_manager.get_connection("system")
# Get basic database information
db_info_sql = """
SELECT
COUNT(*) as table_count,
SUM(table_rows) as total_rows
FROM information_schema.tables
WHERE table_schema = DATABASE()
AND table_type = 'BASE TABLE'
"""
db_result = await connection.execute(db_info_sql)
db_info = db_result.data[0] if db_result.data else {}
# Get main table list
tables_sql = """
SELECT
table_name,
table_comment,
table_rows
FROM information_schema.tables
WHERE table_schema = DATABASE()
AND table_type = 'BASE TABLE'
ORDER BY table_rows DESC
LIMIT 10
"""
tables_result = await connection.execute(tables_sql)
context = f"""Current database statistics:
- Total number of tables: {db_info.get("table_count", 0)}
- Total data rows: {db_info.get("total_rows", 0):,}
Main data tables:"""
for table in tables_result.data:
context += f"\n- {table['table_name']}"
if table.get("table_comment"):
context += f": {table['table_comment']}"
context += f" ({table.get('table_rows', 0):,} rows)"
return context
except Exception as e:
return f"Unable to get database context information: {str(e)}"
def get_templates_by_category(self, category: str) -> list[PromptTemplate]:
"""Get templates by category"""
return [
template
for template in self.templates.values()
if template.category == category
]
def get_all_categories(self) -> list[str]:
"""Get all template categories"""
categories = {template.category for template in self.templates.values()}
return sorted(categories)