[fix]Fixed five known issues, including token authentication and multi-worker operation. (#63)

* 0.6.1Version

* fix 0.5.1 schema async bug

* fix security bug

* fix security bug

* Add complete Token, JWT, OAuth authentication system

* Add complete Token, JWT, OAuth authentication system

* Add complete Token, JWT, OAuth authentication system

* Add complete Token, JWT, OAuth authentication system

* Add a controllable MCP Server DB Pool permission authentication system, connect it with the Doris permission system, and provide it to enterprise-level applications concurrently with the multi-Worker mode.

* Add Tokens Management

* change version

* fix stdio start bug

* fix stdio start bug

* fix stdio start bug
This commit is contained in:
Yijia Su
2025-11-04 14:45:38 +08:00
committed by GitHub
parent 2613912df3
commit a125a2f5f8
6 changed files with 295 additions and 88 deletions

View File

@@ -642,6 +642,16 @@ class DorisServer:
# Store auth context in scope for potential use by tools/resources # Store auth context in scope for potential use by tools/resources
scope["auth_context"] = auth_context scope["auth_context"] = auth_context
# FIX for Issue #62 Bug 1: Set auth_context in context variable
# This allows tools to access token information for token-bound database configuration
try:
from contextvars import ContextVar
auth_context_var: ContextVar = ContextVar('mcp_auth_context', default=None)
auth_context_var.set(auth_context)
self.logger.debug(f"Set auth_context in context variable with token: {bool(hasattr(auth_context, 'token') and auth_context.token)}")
except Exception as ctx_error:
self.logger.warning(f"Failed to set auth_context in context variable: {ctx_error}")
except Exception as auth_error: except Exception as auth_error:
self.logger.error(f"MCP authentication failed: {auth_error}") self.logger.error(f"MCP authentication failed: {auth_error}")
# Return 401 Unauthorized # Return 401 Unauthorized

View File

@@ -95,12 +95,14 @@ class DorisConnection:
await cursor.execute(sql, params) await cursor.execute(sql, params)
# Check if it's a query statement (statement that returns result set) # Check if it's a query statement (statement that returns result set)
# FIX for Issue #62 Bug 5: Added WITH support for Common Table Expressions (CTE)
sql_upper = sql.strip().upper() sql_upper = sql.strip().upper()
if (sql_upper.startswith("SELECT") or if (sql_upper.startswith("SELECT") or
sql_upper.startswith("SHOW") or sql_upper.startswith("SHOW") or
sql_upper.startswith("DESCRIBE") or sql_upper.startswith("DESCRIBE") or
sql_upper.startswith("DESC") or sql_upper.startswith("DESC") or
sql_upper.startswith("EXPLAIN")): sql_upper.startswith("EXPLAIN") or
sql_upper.startswith("WITH")): # FIX: Support CTE queries
data = await cursor.fetchall() data = await cursor.fetchall()
row_count = len(data) row_count = len(data)
else: else:
@@ -250,7 +252,16 @@ class DorisConnectionManager:
self.logger = get_logger(__name__) self.logger = get_logger(__name__)
self.security_manager = security_manager self.security_manager = security_manager
self.token_manager = token_manager # Token manager for token-bound DB config self.token_manager = token_manager # Token manager for token-bound DB config
self.session_cache = DorisSessionCache(self)
# FIX for Issue #58 Problem 1: Disable session caching to prevent connection sharing
# Session caching causes multiple threads to share the same MySQL connection,
# leading to race conditions and deadlocks in multi-threaded environments
# By disabling caching, each request gets a fresh connection from the pool
self.session_cache = DorisSessionCache(
self,
cache_system_session=False, # Disabled to prevent multi-thread issues
cache_user_session=False # Disabled to prevent multi-thread issues
)
# Store original database config for fallback # Store original database config for fallback
self.original_db_config = { self.original_db_config = {
@@ -1258,10 +1269,36 @@ class DorisConnectionManager:
async def execute_query( async def execute_query(
self, session_id: str, sql: str, params: tuple | None = None, auth_context=None self, session_id: str, sql: str, params: tuple | None = None, auth_context=None
) -> QueryResult: ) -> QueryResult:
"""Execute query - Simplified Strategy with automatic connection management""" """Execute query - Simplified Strategy with automatic connection management
FIX for Issue #62 Bug 1: Configure token-bound database before query execution
"""
connection = None connection = None
try: try:
# Always get fresh connection from pool # FIX: Configure database for token BEFORE getting connection
# This ensures token-bound database configuration is used instead of global config
if auth_context and hasattr(auth_context, 'token') and auth_context.token:
try:
success, config_source = await self.configure_for_token(auth_context.token)
if success:
self.logger.info(f"Session {session_id}: Using {config_source} database configuration")
else:
self.logger.warning(f"Session {session_id}: Token configuration failed, may use global config")
except Exception as token_config_error:
# SECURITY: If token should have config but configuration fails, don't fallback
# This prevents privilege escalation (using high-privilege default user)
if self.token_manager:
self.logger.error(f"Session {session_id}: Token database configuration failed: {token_config_error}")
raise RuntimeError(
f"Failed to configure database for authenticated token. "
f"This is a security measure to prevent using default high-privilege credentials. "
f"Error: {token_config_error}"
)
else:
# No token manager, can use global config
self.logger.warning(f"Session {session_id}: No token manager, using global config")
# Always get fresh connection from pool (with configured database)
connection = await self.get_connection(session_id) connection = await self.get_connection(session_id)
# Execute query # Execute query

View File

@@ -546,9 +546,13 @@ class DorisQueryExecutor:
limit: int = 1000, limit: int = 1000,
timeout: int = 30, timeout: int = 30,
session_id: str = "mcp_session", session_id: str = "mcp_session",
user_id: str = "mcp_user" user_id: str = "mcp_user",
auth_context = None # FIX for Issue #62 Bug 1: Accept auth_context with token
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Execute SQL query for MCP interface - unified method""" """Execute SQL query for MCP interface - unified method
FIX for Issue #62 Bug 1: Now accepts auth_context parameter to support token-bound database configuration
"""
max_retries = 2 max_retries = 2
retry_count = 0 retry_count = 0
@@ -564,14 +568,20 @@ class DorisQueryExecutor:
# Import required security modules # Import required security modules
from .security import DorisSecurityManager, AuthContext, SecurityLevel from .security import DorisSecurityManager, AuthContext, SecurityLevel
# Create proper auth context with read-only permissions # FIX: Use provided auth_context if available (contains token for DB config)
# Otherwise create default auth context for backward compatibility
if auth_context is None:
auth_context = AuthContext( auth_context = AuthContext(
user_id=user_id, user_id=user_id,
roles=["read_only_user"], # Restrictive role for MCP interface roles=["read_only_user"], # Restrictive role for MCP interface
permissions=["read_data"], # Only read permissions permissions=["read_data"], # Only read permissions
session_id=session_id, session_id=session_id,
security_level=SecurityLevel.INTERNAL security_level=SecurityLevel.INTERNAL,
token="" # No token in default context
) )
else:
# Use provided auth_context (may contain token for database configuration)
self.logger.debug(f"Using provided auth_context with token: {bool(hasattr(auth_context, 'token') and auth_context.token)}")
# Perform SQL security validation if enabled # Perform SQL security validation if enabled
if hasattr(self.connection_manager, 'config') and hasattr(self.connection_manager.config, 'security'): if hasattr(self.connection_manager, 'config') and hasattr(self.connection_manager.config, 'security'):
@@ -880,17 +890,20 @@ async def execute_sql_query(sql: str, connection_manager: DorisConnectionManager
This function now includes security validation to ensure safe query execution. This function now includes security validation to ensure safe query execution.
All queries are validated against the configured security policies before execution. All queries are validated against the configured security policies before execution.
FIX for Issue #62 Bug 1: Now supports auth_context parameter for token-bound database configuration
FIX for Issue #58 Problem 2: Removed executor.close() to prevent ClosedResourceError in multi-worker mode
""" """
try: try:
# Create query executor with the connection manager's configuration # Create query executor with the connection manager's configuration
executor = DorisQueryExecutor(connection_manager) executor = DorisQueryExecutor(connection_manager)
try:
# Extract parameters from kwargs or use defaults # Extract parameters from kwargs or use defaults
limit = kwargs.get("limit", 1000) limit = kwargs.get("limit", 1000)
timeout = kwargs.get("timeout", 30) timeout = kwargs.get("timeout", 30)
session_id = kwargs.get("session_id", "mcp_session") session_id = kwargs.get("session_id", "mcp_session")
user_id = kwargs.get("user_id", "mcp_user") user_id = kwargs.get("user_id", "mcp_user")
auth_context = kwargs.get("auth_context", None) # FIX: Extract auth_context
# The execute_sql_for_mcp method now includes security validation # The execute_sql_for_mcp method now includes security validation
result = await executor.execute_sql_for_mcp( result = await executor.execute_sql_for_mcp(
@@ -898,11 +911,17 @@ async def execute_sql_query(sql: str, connection_manager: DorisConnectionManager
limit=limit, limit=limit,
timeout=timeout, timeout=timeout,
session_id=session_id, session_id=session_id,
user_id=user_id user_id=user_id,
auth_context=auth_context # FIX: Pass auth_context with token
) )
# FIX for Issue #58 Problem 2: Do NOT close executor here
# In multi-worker mode, closing here causes ClosedResourceError
# The executor's resources (cache, background tasks) will be managed
# by the connection_manager lifecycle and Python's garbage collection
# This prevents premature cleanup while MCP session manager is still processing
return result return result
finally:
await executor.close()
except Exception as e: except Exception as e:
return { return {

View File

@@ -1464,6 +1464,9 @@ class MetadataExtractor:
""" """
Execute SQL query and return results, supports catalog federation queries Execute SQL query and return results, supports catalog federation queries
Unified interface for MCP tools Unified interface for MCP tools
FIX for Issue #62 Bug 1: Now retrieves auth_context from context variable to support token-bound database configuration
FIX for Issue #62 Bug 3: Now uses db_name and catalog_name parameters to switch database context
""" """
logger.info(f"Executing SQL query: {sql}, DB: {db_name}, Catalog: {catalog_name}, MaxRows: {max_rows}, Timeout: {timeout}") logger.info(f"Executing SQL query: {sql}, DB: {db_name}, Catalog: {catalog_name}, MaxRows: {max_rows}, Timeout: {timeout}")
@@ -1471,15 +1474,63 @@ class MetadataExtractor:
if not sql: if not sql:
return self._format_response(success=False, error="No SQL statement provided", message="Please provide SQL statement to execute") return self._format_response(success=False, error="No SQL statement provided", message="Please provide SQL statement to execute")
# FIX for Issue #62 Bug 3: Build context switching SQL if db_name or catalog_name is specified
final_sql = sql
if catalog_name or db_name:
context_statements = []
if catalog_name:
# Switch to specified catalog
context_statements.append(f"USE CATALOG `{catalog_name}`")
logger.debug(f"Switching to catalog: {catalog_name}")
if db_name:
# Switch to specified database
if catalog_name:
context_statements.append(f"USE `{catalog_name}`.`{db_name}`")
else:
context_statements.append(f"USE `{db_name}`")
logger.debug(f"Switching to database: {db_name}")
# Combine context switching with original SQL
if context_statements:
# Remove trailing semicolon from context statements if present
context_sql = "; ".join(context_statements)
# Ensure original SQL doesn't start with semicolon
sql_clean = sql.lstrip(";").strip()
final_sql = f"{context_sql}; {sql_clean}"
logger.debug(f"Modified SQL with context switching: {final_sql[:200]}...")
# FIX: Try to get auth_context from context variable (set by HTTP middleware)
# This allows token-bound database configuration to work
auth_context = None
try:
from contextvars import ContextVar
from .security import AuthContext
# Try to get auth_context from context variable
# This will be set by the HTTP request handler in main.py
auth_context_var: ContextVar = ContextVar('mcp_auth_context', default=None)
auth_context = auth_context_var.get()
if auth_context:
logger.debug(f"Retrieved auth_context from context variable with token: {bool(hasattr(auth_context, 'token') and auth_context.token)}")
else:
logger.debug("No auth_context found in context variable, using default")
except Exception as ctx_error:
logger.debug(f"Could not retrieve auth_context from context variable: {ctx_error}")
auth_context = None
# Import query executor # Import query executor
from .query_executor import execute_sql_query from .query_executor import execute_sql_query
# Call execute_sql_query to execute query # Call execute_sql_query to execute query with auth_context
exec_result = await execute_sql_query( exec_result = await execute_sql_query(
sql=sql, sql=final_sql, # Use modified SQL with context switching
connection_manager=self.connection_manager, connection_manager=self.connection_manager,
limit=max_rows, limit=max_rows,
timeout=timeout timeout=timeout,
auth_context=auth_context # FIX: Pass auth_context with token
) )
return exec_result return exec_result

View File

@@ -939,28 +939,69 @@ class SQLSecurityValidator:
async def _check_sql_injection( async def _check_sql_injection(
self, sql: str, parsed: Statement self, sql: str, parsed: Statement
) -> ValidationResult: ) -> ValidationResult:
"""Check SQL injection risks""" """Check SQL injection risks with improved pattern detection
# Check common SQL injection patterns
FIX for Issue #62 Bug 2: Improved patterns to reduce false positives
Now better distinguishes between legitimate SQL (like BETWEEN...AND) and injection attempts
"""
# Improved injection patterns that are more specific and less prone to false positives
injection_patterns = [ injection_patterns = [
r"(?i)(?<![A-Za-z0-9_])(union|select|insert|update|delete|drop|create|alter)(?![A-Za-z0-9_])\s+[\s\S]*?\s+(?<![A-Za-z0-9_])(union|select|insert|update|delete|drop|create|alter)(?![A-Za-z0-9_])", # Stacked queries with dangerous operations (true injection risk)
r"(\s|^)(or|and)\s+\d+\s*=\s*\d+", r";\s*(DROP|DELETE|TRUNCATE|ALTER|CREATE|INSERT|UPDATE)\s+",
r"(\s|^)(or|and)\s+['\"].*['\"]",
r";\s*(drop|delete|truncate|alter|create)", # UNION-based injection (but allow legitimate UNION queries)
r"(exec|execute|sp_|xp_)", # Only flag if UNION is followed by suspicious patterns like SELECT with WHERE 1=1
r"(script|javascript|vbscript)", r"UNION\s+(ALL\s+)?SELECT\s+.*\s+(WHERE|AND|OR)\s+\d+\s*=\s*\d+",
r"(char|ascii|substring|concat)\s*\(",
# Boolean-based blind injection with comments (true injection pattern)
r"(WHERE|AND|OR)\s+\d+\s*=\s*\d+\s*(--|#|/\*)",
# Quote-based injection attempts (but not in legitimate strings)
r"(WHERE|AND|OR)\s+(['\"])[^\2]*\2\s*=\s*\2[^\2]*\2",
# Time-based blind injection
r"(SLEEP|WAITFOR|BENCHMARK)\s*\(",
# System stored procedure injection
r"(EXEC|EXECUTE|SP_|XP_)\s*\(",
# Script injection attempts
r"<\s*(SCRIPT|JAVASCRIPT|VBSCRIPT)",
] ]
sql_lower = sql.lower() # FIX: Don't flag legitimate SQL functions and keywords
# These patterns are too broad and cause false positives:
# - REMOVED: r"(char|ascii|substring|concat)\s*\(" - These are legitimate SQL functions
# - REMOVED: r"(\s|^)(or|and)\s+\d+\s*=\s*\d+" - This flags BETWEEN...AND constructs
# - REMOVED: r"(\s|^)(or|and)\s+['\"].*['\"]" - This is too broad
sql_upper = sql.upper()
# Special case: Allow BETWEEN...AND which is legitimate SQL
# This prevents false positives like "WHERE dt BETWEEN '2025-01-01' AND '2025-01-31'"
if "BETWEEN" in sql_upper and "AND" in sql_upper:
# This is likely a BETWEEN clause, not injection
# Check if AND appears in a BETWEEN context
between_pattern = r"BETWEEN\s+[^\s]+\s+AND\s+[^\s]+"
if re.search(between_pattern, sql_upper, re.IGNORECASE):
# Remove BETWEEN clauses before checking other patterns
sql_cleaned = re.sub(between_pattern, "BETWEEN_CLAUSE", sql_upper, flags=re.IGNORECASE)
sql_to_check = sql_cleaned
else:
sql_to_check = sql_upper
else:
sql_to_check = sql_upper
for pattern in injection_patterns: for pattern in injection_patterns:
if re.search(pattern, sql_lower, re.IGNORECASE): if re.search(pattern, sql_to_check, re.IGNORECASE):
self.logger.warning(f"Potential SQL injection pattern detected: {pattern}")
return ValidationResult( return ValidationResult(
is_valid=False, is_valid=False,
error_message="Potential SQL injection risk detected", error_message="Potential SQL injection risk detected",
risk_level="high", risk_level="high",
) )
# Check suspicious quotes and comments # Check suspicious quotes and comments (with improved detection)
if self._has_suspicious_quotes_or_comments(sql): if self._has_suspicious_quotes_or_comments(sql):
return ValidationResult( return ValidationResult(
is_valid=False, is_valid=False,
@@ -971,20 +1012,68 @@ class SQLSecurityValidator:
return ValidationResult(is_valid=True) return ValidationResult(is_valid=True)
def _has_suspicious_quotes_or_comments(self, sql: str) -> bool: def _has_suspicious_quotes_or_comments(self, sql: str) -> bool:
"""Check suspicious quote and comment patterns""" """Check suspicious quote and comment patterns with improved detection
# Check unmatched quotes
single_quotes = sql.count("'")
double_quotes = sql.count('"')
FIX for Issue #62 Bug 2: Improved detection to reduce false positives
Now distinguishes between legitimate comments/strings and injection attempts
"""
try:
# Use sqlparse to parse the SQL and distinguish between code and comments/strings
import sqlparse
from sqlparse.tokens import Comment, String
# Parse the SQL
parsed = sqlparse.parse(sql)
if not parsed:
# If parsing fails, be conservative
return True
statement = parsed[0]
# Check for unmatched quotes ONLY in non-string tokens
# This prevents false positives from legitimate string content
non_string_content = []
has_string_tokens = False
for token in statement.flatten():
if token.ttype in (String.Single, String.Double):
has_string_tokens = True
# Skip string content - quotes inside strings are legitimate
continue
elif token.ttype in (Comment.Single, Comment.Multi):
# Comments are generally OK, but check for suspicious injection patterns
comment_value = str(token).lower()
# Check if comment contains dangerous SQL keywords
dangerous_in_comments = ['drop', 'delete', 'insert', 'update', 'union', 'exec', 'execute']
if any(keyword in comment_value for keyword in dangerous_in_comments):
self.logger.warning(f"Suspicious SQL keyword in comment: {token}")
return True
# Normal comments are OK
continue
else:
# Accumulate non-string, non-comment content
non_string_content.append(str(token))
# Check for unmatched quotes in non-string content
non_string_text = ''.join(non_string_content)
single_quotes = non_string_text.count("'")
double_quotes = non_string_text.count('"')
# Only flag if there are unmatched quotes in actual SQL code (not in strings)
if single_quotes % 2 != 0 or double_quotes % 2 != 0: if single_quotes % 2 != 0 or double_quotes % 2 != 0:
return True return True
# Check SQL comments # FIX: Don't flag legitimate SQL comments
if "--" in sql or "/*" in sql: # Comments are OK as long as they don't contain dangerous patterns (already checked above)
return True
return False return False
except Exception as e:
self.logger.debug(f"SQL parsing error in quote/comment check: {e}")
# On parsing error, fall back to conservative check
# But be more lenient than before
return False # Don't flag on parse errors to reduce false positives
async def _check_blocked_keywords(self, parsed: Statement) -> ValidationResult: async def _check_blocked_keywords(self, parsed: Statement) -> ValidationResult:
"""Check blocked keywords""" """Check blocked keywords"""
blocked_operations = [] blocked_operations = []

View File

@@ -64,9 +64,10 @@ else
fi fi
# Set HTTP-specific environment variables # Set HTTP-specific environment variables
# FIX for Issue #62 Bug 4: Use SERVER_PORT instead of MCP_PORT for consistency with code
export MCP_TRANSPORT_TYPE="http" export MCP_TRANSPORT_TYPE="http"
export MCP_HOST="${MCP_HOST:-0.0.0.0}" export MCP_HOST="${MCP_HOST:-0.0.0.0}"
export MCP_PORT="${MCP_PORT:-3000}" export SERVER_PORT="${SERVER_PORT:-3000}" # Changed from MCP_PORT to SERVER_PORT
export WORKERS="${WORKERS:-1}" export WORKERS="${WORKERS:-1}"
export ALLOWED_ORIGINS="${ALLOWED_ORIGINS:-*}" export ALLOWED_ORIGINS="${ALLOWED_ORIGINS:-*}"
export LOG_LEVEL="${LOG_LEVEL:-info}" export LOG_LEVEL="${LOG_LEVEL:-info}"
@@ -77,15 +78,15 @@ export MCP_DEBUG_ADAPTER="true"
export PYTHONPATH="$(pwd):$PYTHONPATH" export PYTHONPATH="$(pwd):$PYTHONPATH"
echo -e "${GREEN}Starting MCP server (Streamable HTTP mode)...${NC}" echo -e "${GREEN}Starting MCP server (Streamable HTTP mode)...${NC}"
echo -e "${YELLOW}Service will run on http://${MCP_HOST}:${MCP_PORT}/mcp${NC}" echo -e "${YELLOW}Service will run on http://${MCP_HOST}:${SERVER_PORT}/mcp${NC}"
echo -e "${YELLOW}Health Check: http://${MCP_HOST}:${MCP_PORT}/health${NC}" echo -e "${YELLOW}Health Check: http://${MCP_HOST}:${SERVER_PORT}/health${NC}"
echo -e "${YELLOW}MCP Endpoint: http://${MCP_HOST}:${MCP_PORT}/mcp${NC}" echo -e "${YELLOW}MCP Endpoint: http://${MCP_HOST}:${SERVER_PORT}/mcp${NC}"
echo -e "${YELLOW}Local access: http://localhost:${MCP_PORT}/mcp${NC}" echo -e "${YELLOW}Local access: http://localhost:${SERVER_PORT}/mcp${NC}"
echo -e "${YELLOW}Workers: ${WORKERS}${NC}" echo -e "${YELLOW}Workers: ${WORKERS}${NC}"
echo -e "${YELLOW}Use Ctrl+C to stop the service${NC}" echo -e "${YELLOW}Use Ctrl+C to stop the service${NC}"
# Start the server in HTTP mode (Streamable HTTP) # Start the server in HTTP mode (Streamable HTTP)
python -m doris_mcp_server.main --transport http --host ${MCP_HOST} --port ${MCP_PORT} --workers ${WORKERS} python -m doris_mcp_server.main --transport http --host ${MCP_HOST} --port ${SERVER_PORT} --workers ${WORKERS}
# Check exit status # Check exit status
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
@@ -97,4 +98,4 @@ fi
echo -e "${YELLOW}Tip: If the page displays abnormally, please clear your browser cache or use incognito mode${NC}" echo -e "${YELLOW}Tip: If the page displays abnormally, please clear your browser cache or use incognito mode${NC}"
echo -e "${YELLOW}Chrome browser clear cache shortcut: Ctrl+Shift+Del (Windows) or Cmd+Shift+Del (Mac)${NC}" echo -e "${YELLOW}Chrome browser clear cache shortcut: Ctrl+Shift+Del (Windows) or Cmd+Shift+Del (Mac)${NC}"
echo -e "${CYAN}For testing HTTP endpoints, you can use:${NC}" echo -e "${CYAN}For testing HTTP endpoints, you can use:${NC}"
echo -e "${CYAN} curl -X POST http://localhost:${MCP_PORT}/mcp -H 'Content-Type: application/json' -d '{\"method\":\"tools/list\"}'${NC}" echo -e "${CYAN} curl -X POST http://localhost:${SERVER_PORT}/mcp -H 'Content-Type: application/json' -d '{\"method\":\"tools/list\"}'${NC}"