init

2025-09-26 17:15:54 +08:00
commit db0e5965ec
211 changed files with 40437 additions and 0 deletions
--- a/vw-agentic-rag/tests/integration/test_e2e_tool_ui.py
+++ b/vw-agentic-rag/tests/integration/test_e2e_tool_ui.py
@@ -0,0 +1,415 @@
+"""
+End-to-End Integration Tests for Tool UI
+
+These tests validate the complete user experience by connecting to a running service.
+They test tool calling, response formatting, and user interface integration.
+"""
+import pytest
+import asyncio
+import httpx
+import time
+import os
+
+
+# Configuration for remote service connection
+DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
+SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
+
+
+@pytest.fixture(scope="session")
+def service_url() -> str:
+    """Get the service URL for testing"""
+    return SERVICE_URL
+
+
+class TestEndToEndWorkflows:
+    """Test complete end-to-end user workflows"""
+
+    @pytest.mark.asyncio
+    async def test_standards_research_with_tools(self, service_url: str):
+        """Test standards research workflow with tool calls"""
+        session_id = f"e2e_standards_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What are the safety requirements for automotive braking systems according to ISO 26262?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Collect the full response to analyze tool usage
+            full_content = ""
+            async for chunk in response.aiter_text():
+                full_content += chunk
+                if len(full_content) > 1000:  # Get substantial content
+                    break
+            
+            # Verify we got meaningful content
+            assert len(full_content) > 100
+            print(f"Standards research response length: {len(full_content)} chars")
+
+    @pytest.mark.asyncio
+    async def test_manufacturing_compliance_workflow(self, service_url: str):
+        """Test manufacturing compliance workflow"""
+        session_id = f"e2e_compliance_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "I need to understand compliance requirements for manufacturing equipment safety. What standards apply?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/ai-sdk/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Test AI SDK format response
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 500:
+                    break
+            
+            assert len(content) > 50
+            print(f"Compliance workflow response length: {len(content)} chars")
+
+    @pytest.mark.asyncio
+    async def test_technical_documentation_workflow(self, service_url: str):
+        """Test technical documentation research workflow"""
+        session_id = f"e2e_technical_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "How do I implement functional safety according to IEC 61508 for industrial control systems?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Collect response
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 800:
+                    break
+            
+            assert len(content) > 100
+            print(f"Technical documentation response length: {len(content)} chars")
+
+
+class TestMultiTurnConversations:
+    """Test multi-turn conversation workflows"""
+
+    @pytest.mark.asyncio
+    async def test_progressive_standards_exploration(self, service_url: str):
+        """Test progressive exploration of standards through multiple turns"""
+        session_id = f"e2e_progressive_{int(time.time())}"
+        
+        conversation_steps = [
+            "What is ISO 26262?",
+            "What are the ASIL levels?",
+            "How do I determine ASIL D requirements?",
+            "What testing is required for ASIL D systems?"
+        ]
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            for i, question in enumerate(conversation_steps):
+                request_data = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=request_data,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Read response
+                content = ""
+                async for chunk in response.aiter_text():
+                    content += chunk
+                    if len(content) > 300:
+                        break
+                
+                assert len(content) > 30
+                print(f"Turn {i+1}: {len(content)} chars")
+                
+                # Brief pause between turns
+                await asyncio.sleep(1)
+
+    @pytest.mark.asyncio
+    async def test_comparative_analysis_workflow(self, service_url: str):
+        """Test comparative analysis across multiple standards"""
+        session_id = f"e2e_comparative_{int(time.time())}"
+        
+        comparison_questions = [
+            "What are the differences between ISO 26262 and IEC 61508?",
+            "Which standard is more appropriate for automotive applications?",
+            "How do the safety integrity levels compare between these standards?"
+        ]
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            for question in comparison_questions:
+                request_data = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/ai-sdk/chat",
+                    json=request_data,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Collect comparison response
+                content = ""
+                async for chunk in response.aiter_text():
+                    content += chunk
+                    if len(content) > 400:
+                        break
+                
+                assert len(content) > 50
+                await asyncio.sleep(1.5)
+
+
+class TestSpecializedQueries:
+    """Test specialized query types and edge cases"""
+
+    @pytest.mark.asyncio
+    async def test_specific_standard_section_query(self, service_url: str):
+        """Test queries about specific sections of standards"""
+        session_id = f"e2e_specific_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What does section 4.3 of ISO 26262-3 say about software architectural design?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 600:
+                    break
+            
+            assert len(content) > 50
+
+    @pytest.mark.asyncio
+    async def test_implementation_guidance_query(self, service_url: str):
+        """Test queries asking for implementation guidance"""
+        session_id = f"e2e_implementation_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "How should I implement a safety management system according to ISO 45001?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/ai-sdk/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 500:
+                    break
+            
+            assert len(content) > 100
+
+    @pytest.mark.asyncio
+    async def test_cross_domain_standards_query(self, service_url: str):
+        """Test queries spanning multiple domains"""
+        session_id = f"e2e_cross_domain_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "How do cybersecurity standards like ISO 27001 relate to functional safety standards like ISO 26262?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 700:
+                    break
+            
+            assert len(content) > 100
+
+
+class TestUserExperience:
+    """Test overall user experience aspects"""
+
+    @pytest.mark.asyncio
+    async def test_response_quality_indicators(self, service_url: str):
+        """Test that responses have quality indicators (good structure, citations, etc.)"""
+        session_id = f"e2e_quality_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What are the key principles of risk assessment in ISO 31000?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Collect full response to analyze quality
+            full_content = ""
+            async for chunk in response.aiter_text():
+                full_content += chunk
+                if len(full_content) > 1200:
+                    break
+            
+            # Basic quality checks
+            assert len(full_content) > 100
+            
+            # Content should contain structured information
+            # (These are basic heuristics for response quality)
+            assert len(full_content.split()) > 20  # At least 20 words
+            
+            print(f"Quality response length: {len(full_content)} chars")
+
+    @pytest.mark.asyncio
+    async def test_error_recovery_experience(self, service_url: str):
+        """Test user experience when recovering from errors"""
+        session_id = f"e2e_error_recovery_{int(time.time())}"
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            # Start with a good question
+            good_request = {
+                "session_id": session_id,
+                "messages": [{"role": "user", "content": "What is ISO 9001?"}]
+            }
+            
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=good_request,
+                headers={"Content-Type": "application/json"}
+            )
+            assert response.status_code == 200
+            
+            await asyncio.sleep(1)
+            
+            # Try a potentially problematic request
+            try:
+                problematic_request = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": ""}]  # Empty content
+                }
+                
+                await client.post(
+                    f"{service_url}/api/chat",
+                    json=problematic_request,
+                    headers={"Content-Type": "application/json"}
+                )
+            except Exception:
+                pass  # Expected to potentially fail
+            
+            await asyncio.sleep(1)
+            
+            # Recovery with another good question
+            recovery_request = {
+                "session_id": session_id,
+                "messages": [{"role": "user", "content": "Can you help me understand quality management?"}]
+            }
+            
+            recovery_response = await client.post(
+                f"{service_url}/api/chat",
+                json=recovery_request,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            # Should recover successfully
+            assert recovery_response.status_code == 200
+            
+            content = ""
+            async for chunk in recovery_response.aiter_text():
+                content += chunk
+                if len(content) > 200:
+                    break
+            
+            assert len(content) > 30
+    print("📤 Sending to backend...")