init
This commit is contained in:
1
vw-agentic-rag/tests/integration/__init__.py
Normal file
1
vw-agentic-rag/tests/integration/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Empty __init__.py files to make test packages
|
||||
170
vw-agentic-rag/tests/integration/test_2phase_retrieval.py
Normal file
170
vw-agentic-rag/tests/integration/test_2phase_retrieval.py
Normal file
@@ -0,0 +1,170 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test 2-phase retrieval strategy
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def test_2phase_retrieval():
|
||||
"""Test that agent uses 2-phase retrieval for content-focused queries"""
|
||||
|
||||
session_id = f"2phase-test-{random.randint(1000000000, 9999999999)}"
|
||||
base_url = "http://127.0.0.1:8000"
|
||||
|
||||
# Test query that should trigger 2-phase retrieval
|
||||
query = "如何测试电动汽车的充电性能?请详细说明测试方法和步骤。"
|
||||
|
||||
logger.info("🎯 2-PHASE RETRIEVAL TEST")
|
||||
logger.info("=" * 80)
|
||||
logger.info(f"📝 Session: {session_id}")
|
||||
logger.info(f"📝 Query: {query}")
|
||||
logger.info("-" * 60)
|
||||
|
||||
# Create the request payload
|
||||
payload = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": query
|
||||
}
|
||||
],
|
||||
"session_id": session_id
|
||||
}
|
||||
|
||||
# Track tool usage
|
||||
metadata_tools = 0
|
||||
content_tools = 0
|
||||
total_tools = 0
|
||||
|
||||
timeout = httpx.Timeout(120.0) # 2 minute timeout
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
logger.info("✅ Streaming response started")
|
||||
|
||||
async with client.stream(
|
||||
"POST",
|
||||
f"{base_url}/api/chat",
|
||||
json=payload,
|
||||
headers={"Content-Type": "application/json"}
|
||||
) as response:
|
||||
|
||||
# Check if the response started successfully
|
||||
if response.status_code != 200:
|
||||
error_body = await response.aread()
|
||||
logger.error(f"❌ HTTP {response.status_code}: {error_body.decode()}")
|
||||
return
|
||||
|
||||
# Process the streaming response
|
||||
current_event_type = None
|
||||
|
||||
async for line in response.aiter_lines():
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
if line.startswith("event: "):
|
||||
current_event_type = line[7:] # Remove "event: " prefix
|
||||
continue
|
||||
|
||||
if line.startswith("data: "):
|
||||
data_str = line[6:] # Remove "data: " prefix
|
||||
|
||||
if data_str == "[DONE]":
|
||||
logger.info("✅ Stream completed with [DONE]")
|
||||
break
|
||||
|
||||
try:
|
||||
event_data = json.loads(data_str)
|
||||
event_type = current_event_type or "unknown"
|
||||
|
||||
if event_type == "tool_start":
|
||||
total_tools += 1
|
||||
tool_name = event_data.get("name", "unknown")
|
||||
args = event_data.get("args", {})
|
||||
query_arg = args.get("query", "")[:50] + "..." if len(args.get("query", "")) > 50 else args.get("query", "")
|
||||
|
||||
if tool_name == "retrieve_standard_regulation":
|
||||
metadata_tools += 1
|
||||
logger.info(f"📋 Phase 1 Tool {metadata_tools}: {tool_name}")
|
||||
logger.info(f" Query: {query_arg}")
|
||||
elif tool_name == "retrieve_doc_chunk_standard_regulation":
|
||||
content_tools += 1
|
||||
logger.info(f"📄 Phase 2 Tool {content_tools}: {tool_name}")
|
||||
logger.info(f" Query: {query_arg}")
|
||||
else:
|
||||
logger.info(f"🔧 Tool {total_tools}: {tool_name}")
|
||||
|
||||
elif event_type == "tool_result":
|
||||
tool_name = event_data.get("name", "unknown")
|
||||
results_count = len(event_data.get("results", []))
|
||||
took_ms = event_data.get("took_ms", 0)
|
||||
logger.info(f"✅ Tool completed: {tool_name} ({results_count} results, {took_ms}ms)")
|
||||
|
||||
elif event_type == "tokens":
|
||||
# Don't log every token, just count them
|
||||
pass
|
||||
|
||||
# Reset event type for next event
|
||||
current_event_type = None
|
||||
|
||||
# Break after many tools to avoid too much output
|
||||
if total_tools > 20:
|
||||
logger.info(" ⚠️ Breaking after 20 tools...")
|
||||
break
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"⚠️ Failed to parse event: {e}")
|
||||
current_event_type = None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Request failed: {e}")
|
||||
return
|
||||
|
||||
# Results
|
||||
logger.info("=" * 80)
|
||||
logger.info("📊 2-PHASE RETRIEVAL ANALYSIS")
|
||||
logger.info("=" * 80)
|
||||
logger.info(f"Phase 1 (Metadata) tools: {metadata_tools}")
|
||||
logger.info(f"Phase 2 (Content) tools: {content_tools}")
|
||||
logger.info(f"Total tools executed: {total_tools}")
|
||||
logger.info("-" * 60)
|
||||
|
||||
# Success criteria
|
||||
success_criteria = [
|
||||
(metadata_tools > 0, f"Phase 1 metadata retrieval: {'✅' if metadata_tools > 0 else '❌'} ({metadata_tools} tools)"),
|
||||
(content_tools > 0, f"Phase 2 content retrieval: {'✅' if content_tools > 0 else '❌'} ({content_tools} tools)"),
|
||||
(total_tools >= 2, f"Multi-tool execution: {'✅' if total_tools >= 2 else '❌'} ({total_tools} tools)")
|
||||
]
|
||||
|
||||
logger.info("✅ SUCCESS CRITERIA:")
|
||||
all_passed = True
|
||||
for passed, message in success_criteria:
|
||||
logger.info(f" {message}")
|
||||
if not passed:
|
||||
all_passed = False
|
||||
|
||||
if all_passed:
|
||||
logger.info("🎉 2-PHASE RETRIEVAL TEST PASSED!")
|
||||
logger.info(" ✅ Agent correctly uses both metadata and content retrieval tools")
|
||||
else:
|
||||
logger.info("❌ 2-PHASE RETRIEVAL TEST FAILED!")
|
||||
if metadata_tools == 0:
|
||||
logger.info(" ❌ No metadata retrieval tools used")
|
||||
if content_tools == 0:
|
||||
logger.info(" ❌ No content retrieval tools used - this is the main issue!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_2phase_retrieval())
|
||||
372
vw-agentic-rag/tests/integration/test_api.py
Normal file
372
vw-agentic-rag/tests/integration/test_api.py
Normal file
@@ -0,0 +1,372 @@
|
||||
"""
|
||||
Remote Integration Tests for Agentic RAG API
|
||||
|
||||
These tests connect to a running service instance remotely to validate:
|
||||
- API endpoints and responses
|
||||
- Request/response schemas
|
||||
- Basic functionality without external dependencies
|
||||
"""
|
||||
import pytest
|
||||
import asyncio
|
||||
import json
|
||||
import httpx
|
||||
from typing import Optional, Dict, Any
|
||||
import time
|
||||
import os
|
||||
|
||||
|
||||
# Configuration for remote service connection
|
||||
DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
|
||||
SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def service_url() -> str:
|
||||
"""Get the service URL for testing"""
|
||||
return SERVICE_URL
|
||||
|
||||
|
||||
class TestBasicAPI:
|
||||
"""Test basic API endpoints and functionality"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_health_endpoint(self, service_url: str):
|
||||
"""Test service health endpoint"""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.get(f"{service_url}/health")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert data["status"] == "healthy"
|
||||
assert data["service"] == "agentic-rag"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_root_endpoint(self, service_url: str):
|
||||
"""Test root API endpoint"""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.get(f"{service_url}/")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "message" in data
|
||||
assert "Agentic RAG API" in data["message"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_openapi_docs(self, service_url: str):
|
||||
"""Test OpenAPI documentation endpoint"""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.get(f"{service_url}/openapi.json")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "openapi" in data
|
||||
assert "info" in data
|
||||
assert data["info"]["title"] == "Agentic RAG API"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_docs_endpoint(self, service_url: str):
|
||||
"""Test Swagger UI docs endpoint"""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.get(f"{service_url}/docs")
|
||||
assert response.status_code == 200
|
||||
assert "text/html" in response.headers["content-type"]
|
||||
|
||||
|
||||
class TestChatAPI:
|
||||
"""Test chat API endpoints with valid requests"""
|
||||
|
||||
def _create_chat_request(self, message: str, session_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Create a valid chat request"""
|
||||
return {
|
||||
"session_id": session_id or f"test_session_{int(time.time())}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": message
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_endpoint_basic_request(self, service_url: str):
|
||||
"""Test basic chat endpoint request/response structure"""
|
||||
request_data = self._create_chat_request("Hello, can you help me?")
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
# Response should be streaming text/event-stream
|
||||
assert "text/event-stream" in response.headers.get("content-type", "") or \
|
||||
"text/plain" in response.headers.get("content-type", "")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ai_sdk_chat_endpoint_basic_request(self, service_url: str):
|
||||
"""Test AI SDK compatible chat endpoint"""
|
||||
request_data = self._create_chat_request("What is ISO 26262?")
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
# AI SDK endpoint returns plain text stream
|
||||
assert "text/plain" in response.headers.get("content-type", "")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_chat_endpoint_invalid_request(self, service_url: str):
|
||||
"""Test chat endpoint with invalid request data"""
|
||||
invalid_requests = [
|
||||
{}, # Empty request
|
||||
{"session_id": "test"}, # Missing messages
|
||||
{"messages": []}, # Missing session_id
|
||||
{"session_id": "test", "messages": [{"role": "invalid"}]}, # Invalid message format
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
for invalid_request in invalid_requests:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=invalid_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
# Should return 422 for validation errors
|
||||
assert response.status_code == 422
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_session_persistence(self, service_url: str):
|
||||
"""Test that sessions persist across multiple requests"""
|
||||
session_id = f"persistent_session_{int(time.time())}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
# First message
|
||||
request1 = self._create_chat_request("My name is John", session_id)
|
||||
response1 = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request1,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
assert response1.status_code == 200
|
||||
|
||||
# Wait a moment for processing
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Second message referring to previous context
|
||||
request2 = self._create_chat_request("What did I just tell you my name was?", session_id)
|
||||
response2 = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request2,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
assert response2.status_code == 200
|
||||
|
||||
|
||||
class TestRequestValidation:
|
||||
"""Test request validation and error handling"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_malformed_json(self, service_url: str):
|
||||
"""Test endpoint with malformed JSON"""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
content="invalid json{",
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_content_type(self, service_url: str):
|
||||
"""Test endpoint without proper content type"""
|
||||
request_data = {
|
||||
"session_id": "test_session",
|
||||
"messages": [{"role": "user", "content": "test"}]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
content=json.dumps(request_data)
|
||||
# No Content-Type header
|
||||
)
|
||||
# FastAPI should handle this gracefully
|
||||
assert response.status_code in [415, 422]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_oversized_request(self, service_url: str):
|
||||
"""Test endpoint with very large request"""
|
||||
large_content = "x" * 100000 # 100KB message
|
||||
request_data = {
|
||||
"session_id": "test_session",
|
||||
"messages": [{"role": "user", "content": large_content}]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
# Should either process or reject gracefully
|
||||
assert response.status_code in [200, 413, 422]
|
||||
|
||||
|
||||
class TestCORSAndHeaders:
|
||||
"""Test CORS and security headers"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cors_headers(self, service_url: str):
|
||||
"""Test CORS headers are properly set"""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.options(
|
||||
f"{service_url}/api/chat",
|
||||
headers={
|
||||
"Origin": "http://localhost:3000",
|
||||
"Access-Control-Request-Method": "POST",
|
||||
"Access-Control-Request-Headers": "Content-Type"
|
||||
}
|
||||
)
|
||||
|
||||
# CORS preflight should be handled
|
||||
assert response.status_code in [200, 204]
|
||||
|
||||
# Check for CORS headers in actual request
|
||||
request_data = {
|
||||
"session_id": "cors_test",
|
||||
"messages": [{"role": "user", "content": "test"}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Origin": "http://localhost:3000"
|
||||
}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
# Should have CORS headers
|
||||
assert "access-control-allow-origin" in response.headers
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_security_headers(self, service_url: str):
|
||||
"""Test basic security headers"""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.get(f"{service_url}/health")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Check for basic security practices
|
||||
# Note: Specific headers depend on deployment configuration
|
||||
headers = response.headers
|
||||
|
||||
# FastAPI should include some basic headers
|
||||
assert "content-length" in headers or "transfer-encoding" in headers
|
||||
|
||||
|
||||
class TestErrorHandling:
|
||||
"""Test error handling and edge cases"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_nonexistent_endpoint(self, service_url: str):
|
||||
"""Test request to non-existent endpoint"""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.get(f"{service_url}/nonexistent")
|
||||
assert response.status_code == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_method_not_allowed(self, service_url: str):
|
||||
"""Test wrong HTTP method on endpoint"""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.get(f"{service_url}/api/chat") # GET instead of POST
|
||||
assert response.status_code == 405
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_timeout_handling(self, service_url: str):
|
||||
"""Test request timeout handling"""
|
||||
# Use a very short timeout to test timeout handling
|
||||
async with httpx.AsyncClient(timeout=0.001) as short_timeout_client:
|
||||
try:
|
||||
response = await short_timeout_client.get(f"{service_url}/health")
|
||||
# If it doesn't timeout, that's also fine
|
||||
assert response.status_code == 200
|
||||
except httpx.TimeoutException:
|
||||
# Expected timeout - this is fine
|
||||
pass
|
||||
|
||||
|
||||
class TestServiceIntegration:
|
||||
"""Test integration with actual service features"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_manufacturing_standards_query(self, service_url: str):
|
||||
"""Test query related to manufacturing standards"""
|
||||
request_data = {
|
||||
"session_id": f"standards_test_{int(time.time())}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What are the key safety requirements in ISO 26262?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Read some of the streaming response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 100: # Read enough to verify it's working
|
||||
break
|
||||
|
||||
# Should have some content indicating it's processing
|
||||
assert len(content) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_general_conversation(self, service_url: str):
|
||||
"""Test general conversation capability"""
|
||||
request_data = {
|
||||
"session_id": f"general_test_{int(time.time())}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello! How can you help me today?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify we get streaming response
|
||||
content = ""
|
||||
chunk_count = 0
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
chunk_count += 1
|
||||
if chunk_count > 10: # Read several chunks
|
||||
break
|
||||
|
||||
# Should receive streaming content
|
||||
assert len(content) > 0
|
||||
415
vw-agentic-rag/tests/integration/test_e2e_tool_ui.py
Normal file
415
vw-agentic-rag/tests/integration/test_e2e_tool_ui.py
Normal file
@@ -0,0 +1,415 @@
|
||||
"""
|
||||
End-to-End Integration Tests for Tool UI
|
||||
|
||||
These tests validate the complete user experience by connecting to a running service.
|
||||
They test tool calling, response formatting, and user interface integration.
|
||||
"""
|
||||
import pytest
|
||||
import asyncio
|
||||
import httpx
|
||||
import time
|
||||
import os
|
||||
|
||||
|
||||
# Configuration for remote service connection
|
||||
DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
|
||||
SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def service_url() -> str:
|
||||
"""Get the service URL for testing"""
|
||||
return SERVICE_URL
|
||||
|
||||
|
||||
class TestEndToEndWorkflows:
|
||||
"""Test complete end-to-end user workflows"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_standards_research_with_tools(self, service_url: str):
|
||||
"""Test standards research workflow with tool calls"""
|
||||
session_id = f"e2e_standards_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What are the safety requirements for automotive braking systems according to ISO 26262?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect the full response to analyze tool usage
|
||||
full_content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
full_content += chunk
|
||||
if len(full_content) > 1000: # Get substantial content
|
||||
break
|
||||
|
||||
# Verify we got meaningful content
|
||||
assert len(full_content) > 100
|
||||
print(f"Standards research response length: {len(full_content)} chars")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_manufacturing_compliance_workflow(self, service_url: str):
|
||||
"""Test manufacturing compliance workflow"""
|
||||
session_id = f"e2e_compliance_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "I need to understand compliance requirements for manufacturing equipment safety. What standards apply?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Test AI SDK format response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 500:
|
||||
break
|
||||
|
||||
assert len(content) > 50
|
||||
print(f"Compliance workflow response length: {len(content)} chars")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_technical_documentation_workflow(self, service_url: str):
|
||||
"""Test technical documentation research workflow"""
|
||||
session_id = f"e2e_technical_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How do I implement functional safety according to IEC 61508 for industrial control systems?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 800:
|
||||
break
|
||||
|
||||
assert len(content) > 100
|
||||
print(f"Technical documentation response length: {len(content)} chars")
|
||||
|
||||
|
||||
class TestMultiTurnConversations:
|
||||
"""Test multi-turn conversation workflows"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_progressive_standards_exploration(self, service_url: str):
|
||||
"""Test progressive exploration of standards through multiple turns"""
|
||||
session_id = f"e2e_progressive_{int(time.time())}"
|
||||
|
||||
conversation_steps = [
|
||||
"What is ISO 26262?",
|
||||
"What are the ASIL levels?",
|
||||
"How do I determine ASIL D requirements?",
|
||||
"What testing is required for ASIL D systems?"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
for i, question in enumerate(conversation_steps):
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Read response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 300:
|
||||
break
|
||||
|
||||
assert len(content) > 30
|
||||
print(f"Turn {i+1}: {len(content)} chars")
|
||||
|
||||
# Brief pause between turns
|
||||
await asyncio.sleep(1)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_comparative_analysis_workflow(self, service_url: str):
|
||||
"""Test comparative analysis across multiple standards"""
|
||||
session_id = f"e2e_comparative_{int(time.time())}"
|
||||
|
||||
comparison_questions = [
|
||||
"What are the differences between ISO 26262 and IEC 61508?",
|
||||
"Which standard is more appropriate for automotive applications?",
|
||||
"How do the safety integrity levels compare between these standards?"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
for question in comparison_questions:
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect comparison response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 400:
|
||||
break
|
||||
|
||||
assert len(content) > 50
|
||||
await asyncio.sleep(1.5)
|
||||
|
||||
|
||||
class TestSpecializedQueries:
|
||||
"""Test specialized query types and edge cases"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_specific_standard_section_query(self, service_url: str):
|
||||
"""Test queries about specific sections of standards"""
|
||||
session_id = f"e2e_specific_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What does section 4.3 of ISO 26262-3 say about software architectural design?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 600:
|
||||
break
|
||||
|
||||
assert len(content) > 50
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_implementation_guidance_query(self, service_url: str):
|
||||
"""Test queries asking for implementation guidance"""
|
||||
session_id = f"e2e_implementation_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How should I implement a safety management system according to ISO 45001?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 500:
|
||||
break
|
||||
|
||||
assert len(content) > 100
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cross_domain_standards_query(self, service_url: str):
|
||||
"""Test queries spanning multiple domains"""
|
||||
session_id = f"e2e_cross_domain_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How do cybersecurity standards like ISO 27001 relate to functional safety standards like ISO 26262?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 700:
|
||||
break
|
||||
|
||||
assert len(content) > 100
|
||||
|
||||
|
||||
class TestUserExperience:
|
||||
"""Test overall user experience aspects"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_response_quality_indicators(self, service_url: str):
|
||||
"""Test that responses have quality indicators (good structure, citations, etc.)"""
|
||||
session_id = f"e2e_quality_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What are the key principles of risk assessment in ISO 31000?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect full response to analyze quality
|
||||
full_content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
full_content += chunk
|
||||
if len(full_content) > 1200:
|
||||
break
|
||||
|
||||
# Basic quality checks
|
||||
assert len(full_content) > 100
|
||||
|
||||
# Content should contain structured information
|
||||
# (These are basic heuristics for response quality)
|
||||
assert len(full_content.split()) > 20 # At least 20 words
|
||||
|
||||
print(f"Quality response length: {len(full_content)} chars")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_error_recovery_experience(self, service_url: str):
|
||||
"""Test user experience when recovering from errors"""
|
||||
session_id = f"e2e_error_recovery_{int(time.time())}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
# Start with a good question
|
||||
good_request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "What is ISO 9001?"}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=good_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Try a potentially problematic request
|
||||
try:
|
||||
problematic_request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": ""}] # Empty content
|
||||
}
|
||||
|
||||
await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=problematic_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
except Exception:
|
||||
pass # Expected to potentially fail
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Recovery with another good question
|
||||
recovery_request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "Can you help me understand quality management?"}]
|
||||
}
|
||||
|
||||
recovery_response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=recovery_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
# Should recover successfully
|
||||
assert recovery_response.status_code == 200
|
||||
|
||||
content = ""
|
||||
async for chunk in recovery_response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 200:
|
||||
break
|
||||
|
||||
assert len(content) > 30
|
||||
print("📤 Sending to backend...")
|
||||
402
vw-agentic-rag/tests/integration/test_full_workflow.py
Normal file
402
vw-agentic-rag/tests/integration/test_full_workflow.py
Normal file
@@ -0,0 +1,402 @@
|
||||
"""
|
||||
Full Workflow Integration Tests
|
||||
|
||||
These tests validate complete end-to-end workflows by connecting to a running service.
|
||||
They test realistic user scenarios and complex interactions.
|
||||
"""
|
||||
import pytest
|
||||
import asyncio
|
||||
import httpx
|
||||
import time
|
||||
import os
|
||||
from typing import List, Dict, Any
|
||||
|
||||
|
||||
# Configuration for remote service connection
|
||||
DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
|
||||
SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def service_url() -> str:
|
||||
"""Get the service URL for testing"""
|
||||
return SERVICE_URL
|
||||
|
||||
|
||||
class TestCompleteWorkflows:
|
||||
"""Test complete user workflows"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_standards_research_workflow(self, service_url: str):
|
||||
"""Test a complete standards research workflow"""
|
||||
session_id = f"standards_workflow_{int(time.time())}"
|
||||
|
||||
# Simulate a user researching ISO 26262
|
||||
conversation_flow = [
|
||||
"What is ISO 26262 and what does it cover?",
|
||||
"What are the ASIL levels in ISO 26262?",
|
||||
"Can you explain ASIL D requirements in detail?",
|
||||
"How does ISO 26262 relate to vehicle cybersecurity?"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
for i, question in enumerate(conversation_flow):
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Read the streaming response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 200: # Get substantial response
|
||||
break
|
||||
|
||||
# Verify we get meaningful content
|
||||
assert len(content) > 50
|
||||
print(f"Question {i+1} response length: {len(content)} chars")
|
||||
|
||||
# Small delay between questions
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_manufacturing_safety_workflow(self, service_url: str):
|
||||
"""Test manufacturing safety standards workflow"""
|
||||
session_id = f"manufacturing_workflow_{int(time.time())}"
|
||||
|
||||
conversation_flow = [
|
||||
"What are the key safety standards for manufacturing equipment?",
|
||||
"How do ISO 13849 and IEC 62061 compare?",
|
||||
"What is the process for safety risk assessment in manufacturing?"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
responses = []
|
||||
|
||||
for question in conversation_flow:
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect response content
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 300:
|
||||
break
|
||||
|
||||
responses.append(content)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Verify we got responses for all questions
|
||||
assert len(responses) == len(conversation_flow)
|
||||
for response_content in responses:
|
||||
assert len(response_content) > 30
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_session_context_continuity(self, service_url: str):
|
||||
"""Test that session context is maintained across requests"""
|
||||
session_id = f"context_test_{int(time.time())}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
# First message - establish context
|
||||
request1 = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "I'm working on a safety system for automotive braking. What standard should I follow?"}]
|
||||
}
|
||||
|
||||
response1 = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request1,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
assert response1.status_code == 200
|
||||
|
||||
# Wait for processing
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Follow-up question that depends on context
|
||||
request2 = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "What are the specific testing requirements for this standard?"}]
|
||||
}
|
||||
|
||||
response2 = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request2,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
assert response2.status_code == 200
|
||||
|
||||
# Verify both responses are meaningful
|
||||
content1 = ""
|
||||
async for chunk in response1.aiter_text():
|
||||
content1 += chunk
|
||||
if len(content1) > 100:
|
||||
break
|
||||
|
||||
content2 = ""
|
||||
async for chunk in response2.aiter_text():
|
||||
content2 += chunk
|
||||
if len(content2) > 100:
|
||||
break
|
||||
|
||||
assert len(content1) > 50
|
||||
assert len(content2) > 50
|
||||
|
||||
|
||||
class TestErrorRecoveryWorkflows:
|
||||
"""Test error recovery and edge case workflows"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_session_recovery_after_error(self, service_url: str):
|
||||
"""Test that sessions can recover after encountering errors"""
|
||||
session_id = f"error_recovery_{int(time.time())}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
# Valid request
|
||||
valid_request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "What is ISO 9001?"}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=valid_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
# Try an invalid request that might cause issues
|
||||
invalid_request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": ""}] # Empty content
|
||||
}
|
||||
|
||||
try:
|
||||
await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=invalid_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
except Exception:
|
||||
pass # Expected to potentially fail
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Another valid request to test recovery
|
||||
recovery_request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "Can you summarize what we discussed?"}]
|
||||
}
|
||||
|
||||
recovery_response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=recovery_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
# Session should still work
|
||||
assert recovery_response.status_code == 200
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_sessions(self, service_url: str):
|
||||
"""Test multiple concurrent sessions"""
|
||||
base_time = int(time.time())
|
||||
sessions = [f"concurrent_{base_time}_{i}" for i in range(3)]
|
||||
|
||||
async def test_session(session_id: str, question: str):
|
||||
"""Test a single session"""
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
return session_id
|
||||
|
||||
# Run concurrent sessions
|
||||
questions = [
|
||||
"What is ISO 27001?",
|
||||
"What is NIST Cybersecurity Framework?",
|
||||
"What is GDPR compliance?"
|
||||
]
|
||||
|
||||
tasks = [
|
||||
test_session(session_id, question)
|
||||
for session_id, question in zip(sessions, questions)
|
||||
]
|
||||
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# All sessions should complete successfully
|
||||
assert len(results) == 3
|
||||
for result in results:
|
||||
assert not isinstance(result, Exception)
|
||||
|
||||
|
||||
class TestPerformanceWorkflows:
|
||||
"""Test performance-related workflows"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_rapid_fire_requests(self, service_url: str):
|
||||
"""Test rapid consecutive requests in same session"""
|
||||
session_id = f"rapid_fire_{int(time.time())}"
|
||||
|
||||
questions = [
|
||||
"Hello",
|
||||
"What is ISO 14001?",
|
||||
"Thank you",
|
||||
"Goodbye"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
for i, question in enumerate(questions):
|
||||
request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
print(f"Rapid request {i+1} completed")
|
||||
|
||||
# Very short delay
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_large_context_workflow(self, service_url: str):
|
||||
"""Test workflow with gradually increasing context"""
|
||||
session_id = f"large_context_{int(time.time())}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
# Build up context over multiple turns
|
||||
conversation = [
|
||||
"I need to understand automotive safety standards",
|
||||
"Specifically, tell me about ISO 26262 functional safety",
|
||||
"What are the different ASIL levels and their requirements?",
|
||||
"How do I implement ASIL D for a braking system?",
|
||||
"What testing and validation is required for ASIL D?",
|
||||
"Can you provide a summary of everything we've discussed?"
|
||||
]
|
||||
|
||||
for i, message in enumerate(conversation):
|
||||
request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": message}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
print(f"Context turn {i+1} completed")
|
||||
|
||||
# Allow time for processing
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
class TestRealWorldScenarios:
|
||||
"""Test realistic user scenarios"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compliance_officer_scenario(self, service_url: str):
|
||||
"""Simulate a compliance officer's typical workflow"""
|
||||
session_id = f"compliance_officer_{int(time.time())}"
|
||||
|
||||
# Typical compliance questions
|
||||
scenario_questions = [
|
||||
"I need to ensure our new product meets regulatory requirements. What standards apply to automotive safety systems?",
|
||||
"Our system is classified as ASIL C. What does this mean for our development process?",
|
||||
"What documentation do we need to prepare for safety assessment?",
|
||||
"How often do we need to review and update our safety processes?"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
for i, question in enumerate(scenario_questions):
|
||||
request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Allow realistic time between questions
|
||||
await asyncio.sleep(2)
|
||||
print(f"Compliance scenario step {i+1} completed")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_engineer_research_scenario(self, service_url: str):
|
||||
"""Simulate an engineer researching technical details"""
|
||||
session_id = f"engineer_research_{int(time.time())}"
|
||||
|
||||
research_flow = [
|
||||
"I'm designing a safety-critical system. What's the difference between ISO 26262 and IEC 61508?",
|
||||
"For automotive applications, which standard takes precedence?",
|
||||
"What are the specific requirements for software development under ISO 26262?",
|
||||
"Can you explain the V-model development process required by the standard?"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
for question in research_flow:
|
||||
request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Read some response to verify it's working
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 150:
|
||||
break
|
||||
|
||||
assert len(content) > 50
|
||||
await asyncio.sleep(1.5)
|
||||
406
vw-agentic-rag/tests/integration/test_streaming_integration.py
Normal file
406
vw-agentic-rag/tests/integration/test_streaming_integration.py
Normal file
@@ -0,0 +1,406 @@
|
||||
"""
|
||||
Streaming Integration Tests
|
||||
|
||||
These tests validate streaming behavior by connecting to a running service.
|
||||
They focus on real-time response patterns and streaming event handling.
|
||||
"""
|
||||
import pytest
|
||||
import asyncio
|
||||
import httpx
|
||||
import time
|
||||
import os
|
||||
|
||||
|
||||
# Configuration for remote service connection
|
||||
DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
|
||||
SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def service_url() -> str:
|
||||
"""Get the service URL for testing"""
|
||||
return SERVICE_URL
|
||||
|
||||
|
||||
class TestStreamingBehavior:
|
||||
"""Test streaming response behavior"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_streaming_response(self, service_url: str):
|
||||
"""Test that responses are properly streamed"""
|
||||
session_id = f"streaming_test_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "What is ISO 26262?"}]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect streaming chunks
|
||||
chunks = []
|
||||
async for chunk in response.aiter_text():
|
||||
chunks.append(chunk)
|
||||
if len(chunks) > 10: # Get enough chunks to verify streaming
|
||||
break
|
||||
|
||||
# Should receive multiple chunks (indicating streaming)
|
||||
assert len(chunks) > 1
|
||||
|
||||
# Chunks should have content
|
||||
total_content = "".join(chunks)
|
||||
assert len(total_content) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ai_sdk_streaming_format(self, service_url: str):
|
||||
"""Test AI SDK compatible streaming format"""
|
||||
session_id = f"ai_sdk_streaming_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "Explain vehicle safety testing"}]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert "text/plain" in response.headers.get("content-type", "")
|
||||
|
||||
# Test streaming behavior
|
||||
chunk_count = 0
|
||||
total_length = 0
|
||||
|
||||
async for chunk in response.aiter_text():
|
||||
chunk_count += 1
|
||||
total_length += len(chunk)
|
||||
|
||||
if chunk_count > 15: # Collect enough chunks
|
||||
break
|
||||
|
||||
# Verify streaming characteristics
|
||||
assert chunk_count > 1 # Multiple chunks
|
||||
assert total_length > 50 # Meaningful content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_performance(self, service_url: str):
|
||||
"""Test streaming response timing and performance"""
|
||||
session_id = f"streaming_perf_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "What are automotive safety standards?"}]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
start_time = time.time()
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
first_chunk_time = None
|
||||
chunk_count = 0
|
||||
|
||||
async for chunk in response.aiter_text():
|
||||
if first_chunk_time is None:
|
||||
first_chunk_time = time.time()
|
||||
|
||||
chunk_count += 1
|
||||
if chunk_count > 5: # Get a few chunks for timing
|
||||
break
|
||||
|
||||
# Time to first chunk should be reasonable (< 10 seconds)
|
||||
if first_chunk_time:
|
||||
time_to_first_chunk = first_chunk_time - start_time
|
||||
assert time_to_first_chunk < 10.0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_interruption_handling(self, service_url: str):
|
||||
"""Test behavior when streaming is interrupted"""
|
||||
session_id = f"streaming_interrupt_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "Tell me about ISO standards"}]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Read only a few chunks then stop
|
||||
chunk_count = 0
|
||||
async for chunk in response.aiter_text():
|
||||
chunk_count += 1
|
||||
if chunk_count >= 3:
|
||||
break # Interrupt streaming
|
||||
|
||||
# Should have received some chunks
|
||||
assert chunk_count > 0
|
||||
|
||||
|
||||
class TestConcurrentStreaming:
|
||||
"""Test concurrent streaming scenarios"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiple_concurrent_streams(self, service_url: str):
|
||||
"""Test multiple concurrent streaming requests"""
|
||||
base_time = int(time.time())
|
||||
|
||||
async def stream_request(session_suffix: str, question: str):
|
||||
"""Make a single streaming request"""
|
||||
session_id = f"concurrent_stream_{base_time}_{session_suffix}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json={
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
},
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Read some chunks
|
||||
chunks = 0
|
||||
async for chunk in response.aiter_text():
|
||||
chunks += 1
|
||||
if chunks > 5:
|
||||
break
|
||||
|
||||
return chunks
|
||||
|
||||
# Run multiple concurrent streams
|
||||
questions = [
|
||||
"What is ISO 26262?",
|
||||
"Explain NIST framework",
|
||||
"What is GDPR?"
|
||||
]
|
||||
|
||||
tasks = [
|
||||
stream_request(f"session_{i}", question)
|
||||
for i, question in enumerate(questions)
|
||||
]
|
||||
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# All streams should complete successfully
|
||||
assert len(results) == 3
|
||||
for result in results:
|
||||
assert not isinstance(result, Exception)
|
||||
assert result > 0 # Each stream should receive chunks
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_same_session_rapid_requests(self, service_url: str):
|
||||
"""Test rapid requests in the same session"""
|
||||
session_id = f"rapid_session_{int(time.time())}"
|
||||
|
||||
questions = [
|
||||
"Hello",
|
||||
"What is ISO 9001?",
|
||||
"Thank you"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
for i, question in enumerate(questions):
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Read some response
|
||||
chunk_count = 0
|
||||
async for chunk in response.aiter_text():
|
||||
chunk_count += 1
|
||||
if chunk_count > 3:
|
||||
break
|
||||
|
||||
print(f"Request {i+1} completed with {chunk_count} chunks")
|
||||
|
||||
# Very short delay
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
|
||||
class TestStreamingErrorHandling:
|
||||
"""Test error handling during streaming"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_with_invalid_session(self, service_url: str):
|
||||
"""Test streaming behavior with edge case session IDs"""
|
||||
test_cases = [
|
||||
"", # Empty session ID
|
||||
"a" * 1000, # Very long session ID
|
||||
"session with spaces", # Session ID with spaces
|
||||
"session/with/slashes" # Session ID with special chars
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
for session_id in test_cases:
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
}
|
||||
|
||||
try:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
# Should either work or return validation error
|
||||
assert response.status_code in [200, 422]
|
||||
|
||||
except Exception as e:
|
||||
# Some edge cases might cause exceptions, which is acceptable
|
||||
print(f"Session ID '{session_id}' caused exception: {e}")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_with_large_messages(self, service_url: str):
|
||||
"""Test streaming with large message content"""
|
||||
session_id = f"large_msg_stream_{int(time.time())}"
|
||||
|
||||
# Create a large message
|
||||
large_content = "Please explain safety standards. " * 100 # ~3KB message
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": large_content}]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
# Should handle large messages appropriately
|
||||
assert response.status_code in [200, 413, 422]
|
||||
|
||||
if response.status_code == 200:
|
||||
# If accepted, should stream properly
|
||||
chunk_count = 0
|
||||
async for chunk in response.aiter_text():
|
||||
chunk_count += 1
|
||||
if chunk_count > 5:
|
||||
break
|
||||
|
||||
assert chunk_count > 0
|
||||
|
||||
|
||||
class TestStreamingContentValidation:
|
||||
"""Test streaming content quality and format"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_content_encoding(self, service_url: str):
|
||||
"""Test that streaming content is properly encoded"""
|
||||
session_id = f"encoding_test_{int(time.time())}"
|
||||
|
||||
# Test with special characters and unicode
|
||||
test_message = "What is ISO 26262? Please explain with émphasis on safety ñorms."
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": test_message}]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect content and verify encoding
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 100:
|
||||
break
|
||||
|
||||
# Content should be valid UTF-8
|
||||
assert isinstance(content, str)
|
||||
assert len(content) > 0
|
||||
|
||||
# Should be able to encode/decode
|
||||
encoded = content.encode('utf-8')
|
||||
decoded = encoded.decode('utf-8')
|
||||
assert decoded == content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_response_consistency(self, service_url: str):
|
||||
"""Test that streaming responses are consistent for similar queries"""
|
||||
base_session = f"consistency_test_{int(time.time())}"
|
||||
|
||||
# Ask the same question multiple times
|
||||
test_question = "What is ISO 26262?"
|
||||
|
||||
responses = []
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
for i in range(3):
|
||||
session_id = f"{base_session}_{i}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": test_question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 200:
|
||||
break
|
||||
|
||||
responses.append(content)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# All responses should have content
|
||||
for response_content in responses:
|
||||
assert len(response_content) > 50
|
||||
|
||||
# Responses should have some consistency (all non-empty)
|
||||
assert len([r for r in responses if r.strip()]) == len(responses)
|
||||
Reference in New Issue
Block a user