init
This commit is contained in:
415
vw-agentic-rag/tests/integration/test_e2e_tool_ui.py
Normal file
415
vw-agentic-rag/tests/integration/test_e2e_tool_ui.py
Normal file
@@ -0,0 +1,415 @@
|
||||
"""
|
||||
End-to-End Integration Tests for Tool UI
|
||||
|
||||
These tests validate the complete user experience by connecting to a running service.
|
||||
They test tool calling, response formatting, and user interface integration.
|
||||
"""
|
||||
import pytest
|
||||
import asyncio
|
||||
import httpx
|
||||
import time
|
||||
import os
|
||||
|
||||
|
||||
# Configuration for remote service connection
|
||||
DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
|
||||
SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def service_url() -> str:
|
||||
"""Get the service URL for testing"""
|
||||
return SERVICE_URL
|
||||
|
||||
|
||||
class TestEndToEndWorkflows:
|
||||
"""Test complete end-to-end user workflows"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_standards_research_with_tools(self, service_url: str):
|
||||
"""Test standards research workflow with tool calls"""
|
||||
session_id = f"e2e_standards_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What are the safety requirements for automotive braking systems according to ISO 26262?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect the full response to analyze tool usage
|
||||
full_content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
full_content += chunk
|
||||
if len(full_content) > 1000: # Get substantial content
|
||||
break
|
||||
|
||||
# Verify we got meaningful content
|
||||
assert len(full_content) > 100
|
||||
print(f"Standards research response length: {len(full_content)} chars")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_manufacturing_compliance_workflow(self, service_url: str):
|
||||
"""Test manufacturing compliance workflow"""
|
||||
session_id = f"e2e_compliance_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "I need to understand compliance requirements for manufacturing equipment safety. What standards apply?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Test AI SDK format response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 500:
|
||||
break
|
||||
|
||||
assert len(content) > 50
|
||||
print(f"Compliance workflow response length: {len(content)} chars")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_technical_documentation_workflow(self, service_url: str):
|
||||
"""Test technical documentation research workflow"""
|
||||
session_id = f"e2e_technical_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How do I implement functional safety according to IEC 61508 for industrial control systems?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 800:
|
||||
break
|
||||
|
||||
assert len(content) > 100
|
||||
print(f"Technical documentation response length: {len(content)} chars")
|
||||
|
||||
|
||||
class TestMultiTurnConversations:
|
||||
"""Test multi-turn conversation workflows"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_progressive_standards_exploration(self, service_url: str):
|
||||
"""Test progressive exploration of standards through multiple turns"""
|
||||
session_id = f"e2e_progressive_{int(time.time())}"
|
||||
|
||||
conversation_steps = [
|
||||
"What is ISO 26262?",
|
||||
"What are the ASIL levels?",
|
||||
"How do I determine ASIL D requirements?",
|
||||
"What testing is required for ASIL D systems?"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
for i, question in enumerate(conversation_steps):
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Read response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 300:
|
||||
break
|
||||
|
||||
assert len(content) > 30
|
||||
print(f"Turn {i+1}: {len(content)} chars")
|
||||
|
||||
# Brief pause between turns
|
||||
await asyncio.sleep(1)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_comparative_analysis_workflow(self, service_url: str):
|
||||
"""Test comparative analysis across multiple standards"""
|
||||
session_id = f"e2e_comparative_{int(time.time())}"
|
||||
|
||||
comparison_questions = [
|
||||
"What are the differences between ISO 26262 and IEC 61508?",
|
||||
"Which standard is more appropriate for automotive applications?",
|
||||
"How do the safety integrity levels compare between these standards?"
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
for question in comparison_questions:
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": question}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect comparison response
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 400:
|
||||
break
|
||||
|
||||
assert len(content) > 50
|
||||
await asyncio.sleep(1.5)
|
||||
|
||||
|
||||
class TestSpecializedQueries:
|
||||
"""Test specialized query types and edge cases"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_specific_standard_section_query(self, service_url: str):
|
||||
"""Test queries about specific sections of standards"""
|
||||
session_id = f"e2e_specific_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What does section 4.3 of ISO 26262-3 say about software architectural design?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 600:
|
||||
break
|
||||
|
||||
assert len(content) > 50
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_implementation_guidance_query(self, service_url: str):
|
||||
"""Test queries asking for implementation guidance"""
|
||||
session_id = f"e2e_implementation_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How should I implement a safety management system according to ISO 45001?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/ai-sdk/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 500:
|
||||
break
|
||||
|
||||
assert len(content) > 100
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cross_domain_standards_query(self, service_url: str):
|
||||
"""Test queries spanning multiple domains"""
|
||||
session_id = f"e2e_cross_domain_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How do cybersecurity standards like ISO 27001 relate to functional safety standards like ISO 26262?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 700:
|
||||
break
|
||||
|
||||
assert len(content) > 100
|
||||
|
||||
|
||||
class TestUserExperience:
|
||||
"""Test overall user experience aspects"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_response_quality_indicators(self, service_url: str):
|
||||
"""Test that responses have quality indicators (good structure, citations, etc.)"""
|
||||
session_id = f"e2e_quality_{int(time.time())}"
|
||||
|
||||
request_data = {
|
||||
"session_id": session_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What are the key principles of risk assessment in ISO 31000?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Collect full response to analyze quality
|
||||
full_content = ""
|
||||
async for chunk in response.aiter_text():
|
||||
full_content += chunk
|
||||
if len(full_content) > 1200:
|
||||
break
|
||||
|
||||
# Basic quality checks
|
||||
assert len(full_content) > 100
|
||||
|
||||
# Content should contain structured information
|
||||
# (These are basic heuristics for response quality)
|
||||
assert len(full_content.split()) > 20 # At least 20 words
|
||||
|
||||
print(f"Quality response length: {len(full_content)} chars")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_error_recovery_experience(self, service_url: str):
|
||||
"""Test user experience when recovering from errors"""
|
||||
session_id = f"e2e_error_recovery_{int(time.time())}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=90.0) as client:
|
||||
# Start with a good question
|
||||
good_request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "What is ISO 9001?"}]
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=good_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Try a potentially problematic request
|
||||
try:
|
||||
problematic_request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": ""}] # Empty content
|
||||
}
|
||||
|
||||
await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=problematic_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
except Exception:
|
||||
pass # Expected to potentially fail
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Recovery with another good question
|
||||
recovery_request = {
|
||||
"session_id": session_id,
|
||||
"messages": [{"role": "user", "content": "Can you help me understand quality management?"}]
|
||||
}
|
||||
|
||||
recovery_response = await client.post(
|
||||
f"{service_url}/api/chat",
|
||||
json=recovery_request,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
# Should recover successfully
|
||||
assert recovery_response.status_code == 200
|
||||
|
||||
content = ""
|
||||
async for chunk in recovery_response.aiter_text():
|
||||
content += chunk
|
||||
if len(content) > 200:
|
||||
break
|
||||
|
||||
assert len(content) > 30
|
||||
print("📤 Sending to backend...")
|
||||
Reference in New Issue
Block a user