init

2025-09-26 17:15:54 +08:00
commit db0e5965ec
211 changed files with 40437 additions and 0 deletions
--- a/vw-agentic-rag/tests/integration/init.py
+++ b/vw-agentic-rag/tests/integration/init.py
@@ -0,0 +1 @@
+# Empty __init__.py files to make test packages
--- a/vw-agentic-rag/tests/integration/test_2phase_retrieval.py
+++ b/vw-agentic-rag/tests/integration/test_2phase_retrieval.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""
+Test 2-phase retrieval strategy
+"""
+
+import asyncio
+import httpx
+import json
+import logging
+import random
+import time
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logger = logging.getLogger(__name__)
+
+async def test_2phase_retrieval():
+    """Test that agent uses 2-phase retrieval for content-focused queries"""
+    
+    session_id = f"2phase-test-{random.randint(1000000000, 9999999999)}"
+    base_url = "http://127.0.0.1:8000"
+    
+    # Test query that should trigger 2-phase retrieval
+    query = "如何测试电动汽车的充电性能？请详细说明测试方法和步骤。"
+    
+    logger.info("🎯 2-PHASE RETRIEVAL TEST")
+    logger.info("=" * 80)
+    logger.info(f"📝 Session: {session_id}")
+    logger.info(f"📝 Query: {query}")
+    logger.info("-" * 60)
+    
+    # Create the request payload
+    payload = {
+        "messages": [
+            {
+                "role": "user",
+                "content": query
+            }
+        ],
+        "session_id": session_id
+    }
+    
+    # Track tool usage
+    metadata_tools = 0
+    content_tools = 0
+    total_tools = 0
+    
+    timeout = httpx.Timeout(120.0)  # 2 minute timeout
+    
+    try:
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            logger.info("✅ Streaming response started")
+            
+            async with client.stream(
+                "POST", 
+                f"{base_url}/api/chat",
+                json=payload,
+                headers={"Content-Type": "application/json"}
+            ) as response:
+                
+                # Check if the response started successfully
+                if response.status_code != 200:
+                    error_body = await response.aread()
+                    logger.error(f"❌ HTTP {response.status_code}: {error_body.decode()}")
+                    return
+                
+                # Process the streaming response
+                current_event_type = None
+                
+                async for line in response.aiter_lines():
+                    if not line.strip():
+                        continue
+                    
+                    if line.startswith("event: "):
+                        current_event_type = line[7:]  # Remove "event: " prefix
+                        continue
+                    
+                    if line.startswith("data: "):
+                        data_str = line[6:]  # Remove "data: " prefix
+                        
+                        if data_str == "[DONE]":
+                            logger.info("✅ Stream completed with [DONE]")
+                            break
+                        
+                        try:
+                            event_data = json.loads(data_str)
+                            event_type = current_event_type or "unknown"
+                            
+                            if event_type == "tool_start":
+                                total_tools += 1
+                                tool_name = event_data.get("name", "unknown")
+                                args = event_data.get("args", {})
+                                query_arg = args.get("query", "")[:50] + "..." if len(args.get("query", "")) > 50 else args.get("query", "")
+                                
+                                if tool_name == "retrieve_standard_regulation":
+                                    metadata_tools += 1
+                                    logger.info(f"📋 Phase 1 Tool {metadata_tools}: {tool_name}")
+                                    logger.info(f"   Query: {query_arg}")
+                                elif tool_name == "retrieve_doc_chunk_standard_regulation":
+                                    content_tools += 1
+                                    logger.info(f"📄 Phase 2 Tool {content_tools}: {tool_name}")
+                                    logger.info(f"   Query: {query_arg}")
+                                else:
+                                    logger.info(f"🔧 Tool {total_tools}: {tool_name}")
+                            
+                            elif event_type == "tool_result":
+                                tool_name = event_data.get("name", "unknown")
+                                results_count = len(event_data.get("results", []))
+                                took_ms = event_data.get("took_ms", 0)
+                                logger.info(f"✅ Tool completed: {tool_name} ({results_count} results, {took_ms}ms)")
+                            
+                            elif event_type == "tokens":
+                                # Don't log every token, just count them
+                                pass
+                            
+                            # Reset event type for next event
+                            current_event_type = None
+                            
+                            # Break after many tools to avoid too much output
+                            if total_tools > 20:
+                                logger.info("   ⚠️ Breaking after 20 tools...")
+                                break
+                                
+                        except json.JSONDecodeError as e:
+                            logger.warning(f"⚠️ Failed to parse event: {e}")
+                            current_event_type = None
+                    
+    except Exception as e:
+        logger.error(f"❌ Request failed: {e}")
+        return
+    
+    # Results
+    logger.info("=" * 80)
+    logger.info("📊 2-PHASE RETRIEVAL ANALYSIS")
+    logger.info("=" * 80)
+    logger.info(f"Phase 1 (Metadata) tools: {metadata_tools}")
+    logger.info(f"Phase 2 (Content) tools: {content_tools}")
+    logger.info(f"Total tools executed: {total_tools}")
+    logger.info("-" * 60)
+    
+    # Success criteria
+    success_criteria = [
+        (metadata_tools > 0, f"Phase 1 metadata retrieval: {'✅' if metadata_tools > 0 else '❌'} ({metadata_tools} tools)"),
+        (content_tools > 0, f"Phase 2 content retrieval: {'✅' if content_tools > 0 else '❌'} ({content_tools} tools)"),
+        (total_tools >= 2, f"Multi-tool execution: {'✅' if total_tools >= 2 else '❌'} ({total_tools} tools)")
+    ]
+    
+    logger.info("✅ SUCCESS CRITERIA:")
+    all_passed = True
+    for passed, message in success_criteria:
+        logger.info(f"   {message}")
+        if not passed:
+            all_passed = False
+    
+    if all_passed:
+        logger.info("🎉 2-PHASE RETRIEVAL TEST PASSED!")
+        logger.info("   ✅ Agent correctly uses both metadata and content retrieval tools")
+    else:
+        logger.info("❌ 2-PHASE RETRIEVAL TEST FAILED!")
+        if metadata_tools == 0:
+            logger.info("   ❌ No metadata retrieval tools used")
+        if content_tools == 0:
+            logger.info("   ❌ No content retrieval tools used - this is the main issue!")
+
+if __name__ == "__main__":
+    asyncio.run(test_2phase_retrieval())
--- a/vw-agentic-rag/tests/integration/test_api.py
+++ b/vw-agentic-rag/tests/integration/test_api.py
@@ -0,0 +1,372 @@
+"""
+Remote Integration Tests for Agentic RAG API
+
+These tests connect to a running service instance remotely to validate:
+- API endpoints and responses
+- Request/response schemas
+- Basic functionality without external dependencies
+"""
+import pytest
+import asyncio
+import json
+import httpx
+from typing import Optional, Dict, Any
+import time
+import os
+
+
+# Configuration for remote service connection
+DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
+SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
+
+
+@pytest.fixture(scope="session")
+def service_url() -> str:
+    """Get the service URL for testing"""
+    return SERVICE_URL
+
+
+class TestBasicAPI:
+    """Test basic API endpoints and functionality"""
+
+    @pytest.mark.asyncio
+    async def test_health_endpoint(self, service_url: str):
+        """Test service health endpoint"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{service_url}/health")
+            assert response.status_code == 200
+            
+            data = response.json()
+            assert data["status"] == "healthy"
+            assert data["service"] == "agentic-rag"
+
+    @pytest.mark.asyncio 
+    async def test_root_endpoint(self, service_url: str):
+        """Test root API endpoint"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{service_url}/")
+            assert response.status_code == 200
+            
+            data = response.json()
+            assert "message" in data
+            assert "Agentic RAG API" in data["message"]
+
+    @pytest.mark.asyncio
+    async def test_openapi_docs(self, service_url: str):
+        """Test OpenAPI documentation endpoint"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{service_url}/openapi.json")
+            assert response.status_code == 200
+            
+            data = response.json()
+            assert "openapi" in data
+            assert "info" in data
+            assert data["info"]["title"] == "Agentic RAG API"
+
+    @pytest.mark.asyncio
+    async def test_docs_endpoint(self, service_url: str):
+        """Test Swagger UI docs endpoint"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{service_url}/docs")
+            assert response.status_code == 200
+            assert "text/html" in response.headers["content-type"]
+
+
+class TestChatAPI:
+    """Test chat API endpoints with valid requests"""
+
+    def _create_chat_request(self, message: str, session_id: Optional[str] = None) -> Dict[str, Any]:
+        """Create a valid chat request"""
+        return {
+            "session_id": session_id or f"test_session_{int(time.time())}",
+            "messages": [
+                {
+                    "role": "user", 
+                    "content": message
+                }
+            ]
+        }
+
+    @pytest.mark.asyncio
+    async def test_chat_endpoint_basic_request(self, service_url: str):
+        """Test basic chat endpoint request/response structure"""
+        request_data = self._create_chat_request("Hello, can you help me?")
+        
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            # Response should be streaming text/event-stream
+            assert "text/event-stream" in response.headers.get("content-type", "") or \
+                   "text/plain" in response.headers.get("content-type", "")
+
+    @pytest.mark.asyncio
+    async def test_ai_sdk_chat_endpoint_basic_request(self, service_url: str):
+        """Test AI SDK compatible chat endpoint"""
+        request_data = self._create_chat_request("What is ISO 26262?")
+        
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                f"{service_url}/api/ai-sdk/chat", 
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            # AI SDK endpoint returns plain text stream
+            assert "text/plain" in response.headers.get("content-type", "")
+
+    @pytest.mark.asyncio
+    async def test_chat_endpoint_invalid_request(self, service_url: str):
+        """Test chat endpoint with invalid request data"""
+        invalid_requests = [
+            {},  # Empty request
+            {"session_id": "test"},  # Missing messages
+            {"messages": []},  # Missing session_id
+            {"session_id": "test", "messages": [{"role": "invalid"}]},  # Invalid message format
+        ]
+        
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            for invalid_request in invalid_requests:
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=invalid_request,
+                    headers={"Content-Type": "application/json"}
+                )
+                # Should return 422 for validation errors
+                assert response.status_code == 422
+
+    @pytest.mark.asyncio
+    async def test_session_persistence(self, service_url: str):
+        """Test that sessions persist across multiple requests"""
+        session_id = f"persistent_session_{int(time.time())}"
+        
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            # First message
+            request1 = self._create_chat_request("My name is John", session_id)
+            response1 = await client.post(
+                f"{service_url}/api/chat",
+                json=request1,
+                headers={"Content-Type": "application/json"}
+            )
+            assert response1.status_code == 200
+            
+            # Wait a moment for processing
+            await asyncio.sleep(1)
+            
+            # Second message referring to previous context
+            request2 = self._create_chat_request("What did I just tell you my name was?", session_id)
+            response2 = await client.post(
+                f"{service_url}/api/chat", 
+                json=request2,
+                headers={"Content-Type": "application/json"}
+            )
+            assert response2.status_code == 200
+
+
+class TestRequestValidation:
+    """Test request validation and error handling"""
+
+    @pytest.mark.asyncio
+    async def test_malformed_json(self, service_url: str):
+        """Test endpoint with malformed JSON"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                content="invalid json{",
+                headers={"Content-Type": "application/json"}
+            )
+            assert response.status_code == 422
+
+    @pytest.mark.asyncio
+    async def test_missing_content_type(self, service_url: str):
+        """Test endpoint without proper content type"""
+        request_data = {
+            "session_id": "test_session",
+            "messages": [{"role": "user", "content": "test"}]
+        }
+        
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                content=json.dumps(request_data)
+                # No Content-Type header
+            )
+            # FastAPI should handle this gracefully
+            assert response.status_code in [415, 422]
+
+    @pytest.mark.asyncio
+    async def test_oversized_request(self, service_url: str):
+        """Test endpoint with very large request"""
+        large_content = "x" * 100000  # 100KB message
+        request_data = {
+            "session_id": "test_session",
+            "messages": [{"role": "user", "content": large_content}]
+        }
+        
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            # Should either process or reject gracefully
+            assert response.status_code in [200, 413, 422]
+
+
+class TestCORSAndHeaders:
+    """Test CORS and security headers"""
+
+    @pytest.mark.asyncio
+    async def test_cors_headers(self, service_url: str):
+        """Test CORS headers are properly set"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.options(
+                f"{service_url}/api/chat",
+                headers={
+                    "Origin": "http://localhost:3000",
+                    "Access-Control-Request-Method": "POST",
+                    "Access-Control-Request-Headers": "Content-Type"
+                }
+            )
+            
+            # CORS preflight should be handled
+            assert response.status_code in [200, 204]
+            
+            # Check for CORS headers in actual request
+            request_data = {
+                "session_id": "cors_test",
+                "messages": [{"role": "user", "content": "test"}]
+            }
+            
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={
+                    "Content-Type": "application/json",
+                    "Origin": "http://localhost:3000"
+                }
+            )
+            
+            assert response.status_code == 200
+            # Should have CORS headers
+            assert "access-control-allow-origin" in response.headers
+
+    @pytest.mark.asyncio
+    async def test_security_headers(self, service_url: str):
+        """Test basic security headers"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{service_url}/health")
+            assert response.status_code == 200
+            
+            # Check for basic security practices
+            # Note: Specific headers depend on deployment configuration
+            headers = response.headers
+            
+            # FastAPI should include some basic headers
+            assert "content-length" in headers or "transfer-encoding" in headers
+
+
+class TestErrorHandling:
+    """Test error handling and edge cases"""
+
+    @pytest.mark.asyncio
+    async def test_nonexistent_endpoint(self, service_url: str):
+        """Test request to non-existent endpoint"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{service_url}/nonexistent")
+            assert response.status_code == 404
+
+    @pytest.mark.asyncio
+    async def test_method_not_allowed(self, service_url: str):
+        """Test wrong HTTP method on endpoint"""
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(f"{service_url}/api/chat")  # GET instead of POST
+            assert response.status_code == 405
+
+    @pytest.mark.asyncio
+    async def test_timeout_handling(self, service_url: str):
+        """Test request timeout handling"""
+        # Use a very short timeout to test timeout handling
+        async with httpx.AsyncClient(timeout=0.001) as short_timeout_client:
+            try:
+                response = await short_timeout_client.get(f"{service_url}/health")
+                # If it doesn't timeout, that's also fine
+                assert response.status_code == 200
+            except httpx.TimeoutException:
+                # Expected timeout - this is fine
+                pass
+
+
+class TestServiceIntegration:
+    """Test integration with actual service features"""
+
+    @pytest.mark.asyncio
+    async def test_manufacturing_standards_query(self, service_url: str):
+        """Test query related to manufacturing standards"""
+        request_data = {
+            "session_id": f"standards_test_{int(time.time())}",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What are the key safety requirements in ISO 26262?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(
+                f"{service_url}/api/ai-sdk/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Read some of the streaming response
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 100:  # Read enough to verify it's working
+                    break
+            
+            # Should have some content indicating it's processing
+            assert len(content) > 0
+
+    @pytest.mark.asyncio
+    async def test_general_conversation(self, service_url: str):
+        """Test general conversation capability"""
+        request_data = {
+            "session_id": f"general_test_{int(time.time())}",
+            "messages": [
+                {
+                    "role": "user", 
+                    "content": "Hello! How can you help me today?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Verify we get streaming response
+            content = ""
+            chunk_count = 0
+            async for chunk in response.aiter_text():
+                content += chunk
+                chunk_count += 1
+                if chunk_count > 10:  # Read several chunks
+                    break
+            
+            # Should receive streaming content
+            assert len(content) > 0
--- a/vw-agentic-rag/tests/integration/test_e2e_tool_ui.py
+++ b/vw-agentic-rag/tests/integration/test_e2e_tool_ui.py
@@ -0,0 +1,415 @@
+"""
+End-to-End Integration Tests for Tool UI
+
+These tests validate the complete user experience by connecting to a running service.
+They test tool calling, response formatting, and user interface integration.
+"""
+import pytest
+import asyncio
+import httpx
+import time
+import os
+
+
+# Configuration for remote service connection
+DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
+SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
+
+
+@pytest.fixture(scope="session")
+def service_url() -> str:
+    """Get the service URL for testing"""
+    return SERVICE_URL
+
+
+class TestEndToEndWorkflows:
+    """Test complete end-to-end user workflows"""
+
+    @pytest.mark.asyncio
+    async def test_standards_research_with_tools(self, service_url: str):
+        """Test standards research workflow with tool calls"""
+        session_id = f"e2e_standards_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What are the safety requirements for automotive braking systems according to ISO 26262?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Collect the full response to analyze tool usage
+            full_content = ""
+            async for chunk in response.aiter_text():
+                full_content += chunk
+                if len(full_content) > 1000:  # Get substantial content
+                    break
+            
+            # Verify we got meaningful content
+            assert len(full_content) > 100
+            print(f"Standards research response length: {len(full_content)} chars")
+
+    @pytest.mark.asyncio
+    async def test_manufacturing_compliance_workflow(self, service_url: str):
+        """Test manufacturing compliance workflow"""
+        session_id = f"e2e_compliance_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "I need to understand compliance requirements for manufacturing equipment safety. What standards apply?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/ai-sdk/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Test AI SDK format response
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 500:
+                    break
+            
+            assert len(content) > 50
+            print(f"Compliance workflow response length: {len(content)} chars")
+
+    @pytest.mark.asyncio
+    async def test_technical_documentation_workflow(self, service_url: str):
+        """Test technical documentation research workflow"""
+        session_id = f"e2e_technical_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "How do I implement functional safety according to IEC 61508 for industrial control systems?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Collect response
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 800:
+                    break
+            
+            assert len(content) > 100
+            print(f"Technical documentation response length: {len(content)} chars")
+
+
+class TestMultiTurnConversations:
+    """Test multi-turn conversation workflows"""
+
+    @pytest.mark.asyncio
+    async def test_progressive_standards_exploration(self, service_url: str):
+        """Test progressive exploration of standards through multiple turns"""
+        session_id = f"e2e_progressive_{int(time.time())}"
+        
+        conversation_steps = [
+            "What is ISO 26262?",
+            "What are the ASIL levels?",
+            "How do I determine ASIL D requirements?",
+            "What testing is required for ASIL D systems?"
+        ]
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            for i, question in enumerate(conversation_steps):
+                request_data = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=request_data,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Read response
+                content = ""
+                async for chunk in response.aiter_text():
+                    content += chunk
+                    if len(content) > 300:
+                        break
+                
+                assert len(content) > 30
+                print(f"Turn {i+1}: {len(content)} chars")
+                
+                # Brief pause between turns
+                await asyncio.sleep(1)
+
+    @pytest.mark.asyncio
+    async def test_comparative_analysis_workflow(self, service_url: str):
+        """Test comparative analysis across multiple standards"""
+        session_id = f"e2e_comparative_{int(time.time())}"
+        
+        comparison_questions = [
+            "What are the differences between ISO 26262 and IEC 61508?",
+            "Which standard is more appropriate for automotive applications?",
+            "How do the safety integrity levels compare between these standards?"
+        ]
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            for question in comparison_questions:
+                request_data = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/ai-sdk/chat",
+                    json=request_data,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Collect comparison response
+                content = ""
+                async for chunk in response.aiter_text():
+                    content += chunk
+                    if len(content) > 400:
+                        break
+                
+                assert len(content) > 50
+                await asyncio.sleep(1.5)
+
+
+class TestSpecializedQueries:
+    """Test specialized query types and edge cases"""
+
+    @pytest.mark.asyncio
+    async def test_specific_standard_section_query(self, service_url: str):
+        """Test queries about specific sections of standards"""
+        session_id = f"e2e_specific_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What does section 4.3 of ISO 26262-3 say about software architectural design?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 600:
+                    break
+            
+            assert len(content) > 50
+
+    @pytest.mark.asyncio
+    async def test_implementation_guidance_query(self, service_url: str):
+        """Test queries asking for implementation guidance"""
+        session_id = f"e2e_implementation_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "How should I implement a safety management system according to ISO 45001?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/ai-sdk/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 500:
+                    break
+            
+            assert len(content) > 100
+
+    @pytest.mark.asyncio
+    async def test_cross_domain_standards_query(self, service_url: str):
+        """Test queries spanning multiple domains"""
+        session_id = f"e2e_cross_domain_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "How do cybersecurity standards like ISO 27001 relate to functional safety standards like ISO 26262?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 700:
+                    break
+            
+            assert len(content) > 100
+
+
+class TestUserExperience:
+    """Test overall user experience aspects"""
+
+    @pytest.mark.asyncio
+    async def test_response_quality_indicators(self, service_url: str):
+        """Test that responses have quality indicators (good structure, citations, etc.)"""
+        session_id = f"e2e_quality_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What are the key principles of risk assessment in ISO 31000?"
+                }
+            ]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Collect full response to analyze quality
+            full_content = ""
+            async for chunk in response.aiter_text():
+                full_content += chunk
+                if len(full_content) > 1200:
+                    break
+            
+            # Basic quality checks
+            assert len(full_content) > 100
+            
+            # Content should contain structured information
+            # (These are basic heuristics for response quality)
+            assert len(full_content.split()) > 20  # At least 20 words
+            
+            print(f"Quality response length: {len(full_content)} chars")
+
+    @pytest.mark.asyncio
+    async def test_error_recovery_experience(self, service_url: str):
+        """Test user experience when recovering from errors"""
+        session_id = f"e2e_error_recovery_{int(time.time())}"
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            # Start with a good question
+            good_request = {
+                "session_id": session_id,
+                "messages": [{"role": "user", "content": "What is ISO 9001?"}]
+            }
+            
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=good_request,
+                headers={"Content-Type": "application/json"}
+            )
+            assert response.status_code == 200
+            
+            await asyncio.sleep(1)
+            
+            # Try a potentially problematic request
+            try:
+                problematic_request = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": ""}]  # Empty content
+                }
+                
+                await client.post(
+                    f"{service_url}/api/chat",
+                    json=problematic_request,
+                    headers={"Content-Type": "application/json"}
+                )
+            except Exception:
+                pass  # Expected to potentially fail
+            
+            await asyncio.sleep(1)
+            
+            # Recovery with another good question
+            recovery_request = {
+                "session_id": session_id,
+                "messages": [{"role": "user", "content": "Can you help me understand quality management?"}]
+            }
+            
+            recovery_response = await client.post(
+                f"{service_url}/api/chat",
+                json=recovery_request,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            # Should recover successfully
+            assert recovery_response.status_code == 200
+            
+            content = ""
+            async for chunk in recovery_response.aiter_text():
+                content += chunk
+                if len(content) > 200:
+                    break
+            
+            assert len(content) > 30
+    print("📤 Sending to backend...")
--- a/vw-agentic-rag/tests/integration/test_full_workflow.py
+++ b/vw-agentic-rag/tests/integration/test_full_workflow.py
@@ -0,0 +1,402 @@
+"""
+Full Workflow Integration Tests
+
+These tests validate complete end-to-end workflows by connecting to a running service.
+They test realistic user scenarios and complex interactions.
+"""
+import pytest
+import asyncio
+import httpx
+import time
+import os
+from typing import List, Dict, Any
+
+
+# Configuration for remote service connection
+DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
+SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
+
+
+@pytest.fixture(scope="session")
+def service_url() -> str:
+    """Get the service URL for testing"""
+    return SERVICE_URL
+
+
+class TestCompleteWorkflows:
+    """Test complete user workflows"""
+
+    @pytest.mark.asyncio
+    async def test_standards_research_workflow(self, service_url: str):
+        """Test a complete standards research workflow"""
+        session_id = f"standards_workflow_{int(time.time())}"
+        
+        # Simulate a user researching ISO 26262
+        conversation_flow = [
+            "What is ISO 26262 and what does it cover?",
+            "What are the ASIL levels in ISO 26262?", 
+            "Can you explain ASIL D requirements in detail?",
+            "How does ISO 26262 relate to vehicle cybersecurity?"
+        ]
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            for i, question in enumerate(conversation_flow):
+                request_data = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/ai-sdk/chat",
+                    json=request_data,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Read the streaming response
+                content = ""
+                async for chunk in response.aiter_text():
+                    content += chunk
+                    if len(content) > 200:  # Get substantial response
+                        break
+                
+                # Verify we get meaningful content
+                assert len(content) > 50
+                print(f"Question {i+1} response length: {len(content)} chars")
+                
+                # Small delay between questions
+                await asyncio.sleep(0.5)
+
+    @pytest.mark.asyncio
+    async def test_manufacturing_safety_workflow(self, service_url: str):
+        """Test manufacturing safety standards workflow"""
+        session_id = f"manufacturing_workflow_{int(time.time())}"
+        
+        conversation_flow = [
+            "What are the key safety standards for manufacturing equipment?",
+            "How do ISO 13849 and IEC 62061 compare?",
+            "What is the process for safety risk assessment in manufacturing?"
+        ]
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            responses = []
+            
+            for question in conversation_flow:
+                request_data = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=request_data,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Collect response content
+                content = ""
+                async for chunk in response.aiter_text():
+                    content += chunk
+                    if len(content) > 300:
+                        break
+                
+                responses.append(content)
+                await asyncio.sleep(0.5)
+            
+            # Verify we got responses for all questions
+            assert len(responses) == len(conversation_flow)
+            for response_content in responses:
+                assert len(response_content) > 30
+
+    @pytest.mark.asyncio
+    async def test_session_context_continuity(self, service_url: str):
+        """Test that session context is maintained across requests"""
+        session_id = f"context_test_{int(time.time())}"
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            # First message - establish context
+            request1 = {
+                "session_id": session_id,
+                "messages": [{"role": "user", "content": "I'm working on a safety system for automotive braking. What standard should I follow?"}]
+            }
+            
+            response1 = await client.post(
+                f"{service_url}/api/chat",
+                json=request1,
+                headers={"Content-Type": "application/json"}
+            )
+            assert response1.status_code == 200
+            
+            # Wait for processing
+            await asyncio.sleep(2)
+            
+            # Follow-up question that depends on context
+            request2 = {
+                "session_id": session_id,
+                "messages": [{"role": "user", "content": "What are the specific testing requirements for this standard?"}]
+            }
+            
+            response2 = await client.post(
+                f"{service_url}/api/chat",
+                json=request2,
+                headers={"Content-Type": "application/json"}
+            )
+            assert response2.status_code == 200
+            
+            # Verify both responses are meaningful
+            content1 = ""
+            async for chunk in response1.aiter_text():
+                content1 += chunk
+                if len(content1) > 100:
+                    break
+            
+            content2 = ""
+            async for chunk in response2.aiter_text():
+                content2 += chunk
+                if len(content2) > 100:
+                    break
+            
+            assert len(content1) > 50
+            assert len(content2) > 50
+
+
+class TestErrorRecoveryWorkflows:
+    """Test error recovery and edge case workflows"""
+
+    @pytest.mark.asyncio
+    async def test_session_recovery_after_error(self, service_url: str):
+        """Test that sessions can recover after encountering errors"""
+        session_id = f"error_recovery_{int(time.time())}"
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            # Valid request
+            valid_request = {
+                "session_id": session_id,
+                "messages": [{"role": "user", "content": "What is ISO 9001?"}]
+            }
+            
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=valid_request,
+                headers={"Content-Type": "application/json"}
+            )
+            assert response.status_code == 200
+            
+            # Try an invalid request that might cause issues
+            invalid_request = {
+                "session_id": session_id,
+                "messages": [{"role": "user", "content": ""}]  # Empty content
+            }
+            
+            try:
+                await client.post(
+                    f"{service_url}/api/chat", 
+                    json=invalid_request,
+                    headers={"Content-Type": "application/json"}
+                )
+            except Exception:
+                pass  # Expected to potentially fail
+            
+            await asyncio.sleep(1)
+            
+            # Another valid request to test recovery
+            recovery_request = {
+                "session_id": session_id,
+                "messages": [{"role": "user", "content": "Can you summarize what we discussed?"}]
+            }
+            
+            recovery_response = await client.post(
+                f"{service_url}/api/chat",
+                json=recovery_request,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            # Session should still work
+            assert recovery_response.status_code == 200
+
+    @pytest.mark.asyncio
+    async def test_concurrent_sessions(self, service_url: str):
+        """Test multiple concurrent sessions"""
+        base_time = int(time.time())
+        sessions = [f"concurrent_{base_time}_{i}" for i in range(3)]
+        
+        async def test_session(session_id: str, question: str):
+            """Test a single session"""
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                request = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=request,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                return session_id
+        
+        # Run concurrent sessions
+        questions = [
+            "What is ISO 27001?",
+            "What is NIST Cybersecurity Framework?", 
+            "What is GDPR compliance?"
+        ]
+        
+        tasks = [
+            test_session(session_id, question)
+            for session_id, question in zip(sessions, questions)
+        ]
+        
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        
+        # All sessions should complete successfully
+        assert len(results) == 3
+        for result in results:
+            assert not isinstance(result, Exception)
+
+
+class TestPerformanceWorkflows:
+    """Test performance-related workflows"""
+
+    @pytest.mark.asyncio
+    async def test_rapid_fire_requests(self, service_url: str):
+        """Test rapid consecutive requests in same session"""
+        session_id = f"rapid_fire_{int(time.time())}"
+        
+        questions = [
+            "Hello",
+            "What is ISO 14001?",
+            "Thank you",
+            "Goodbye"
+        ]
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            for i, question in enumerate(questions):
+                request = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=request,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                print(f"Rapid request {i+1} completed")
+                
+                # Very short delay
+                await asyncio.sleep(0.1)
+
+    @pytest.mark.asyncio
+    async def test_large_context_workflow(self, service_url: str):
+        """Test workflow with gradually increasing context"""
+        session_id = f"large_context_{int(time.time())}"
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            # Build up context over multiple turns
+            conversation = [
+                "I need to understand automotive safety standards",
+                "Specifically, tell me about ISO 26262 functional safety",
+                "What are the different ASIL levels and their requirements?",
+                "How do I implement ASIL D for a braking system?",
+                "What testing and validation is required for ASIL D?",
+                "Can you provide a summary of everything we've discussed?"
+            ]
+            
+            for i, message in enumerate(conversation):
+                request = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": message}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=request,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                print(f"Context turn {i+1} completed")
+                
+                # Allow time for processing
+                await asyncio.sleep(1)
+
+
+class TestRealWorldScenarios:
+    """Test realistic user scenarios"""
+
+    @pytest.mark.asyncio
+    async def test_compliance_officer_scenario(self, service_url: str):
+        """Simulate a compliance officer's typical workflow"""
+        session_id = f"compliance_officer_{int(time.time())}"
+        
+        # Typical compliance questions
+        scenario_questions = [
+            "I need to ensure our new product meets regulatory requirements. What standards apply to automotive safety systems?",
+            "Our system is classified as ASIL C. What does this mean for our development process?",
+            "What documentation do we need to prepare for safety assessment?",
+            "How often do we need to review and update our safety processes?"
+        ]
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            for i, question in enumerate(scenario_questions):
+                request = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/ai-sdk/chat",
+                    json=request,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Allow realistic time between questions
+                await asyncio.sleep(2)
+                print(f"Compliance scenario step {i+1} completed")
+
+    @pytest.mark.asyncio
+    async def test_engineer_research_scenario(self, service_url: str):
+        """Simulate an engineer researching technical details"""
+        session_id = f"engineer_research_{int(time.time())}"
+        
+        research_flow = [
+            "I'm designing a safety-critical system. What's the difference between ISO 26262 and IEC 61508?",
+            "For automotive applications, which standard takes precedence?",
+            "What are the specific requirements for software development under ISO 26262?",
+            "Can you explain the V-model development process required by the standard?"
+        ]
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            for question in research_flow:
+                request = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=request,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Read some response to verify it's working
+                content = ""
+                async for chunk in response.aiter_text():
+                    content += chunk
+                    if len(content) > 150:
+                        break
+                
+                assert len(content) > 50
+                await asyncio.sleep(1.5)
--- a/vw-agentic-rag/tests/integration/test_mocked_streaming.py
+++ b/vw-agentic-rag/tests/integration/test_mocked_streaming.py
--- a/vw-agentic-rag/tests/integration/test_streaming_integration.py
+++ b/vw-agentic-rag/tests/integration/test_streaming_integration.py
@@ -0,0 +1,406 @@
+"""
+Streaming Integration Tests
+
+These tests validate streaming behavior by connecting to a running service.
+They focus on real-time response patterns and streaming event handling.
+"""
+import pytest
+import asyncio
+import httpx
+import time
+import os
+
+
+# Configuration for remote service connection
+DEFAULT_SERVICE_URL = "http://127.0.0.1:8000"
+SERVICE_URL = os.getenv("AGENTIC_RAG_SERVICE_URL", DEFAULT_SERVICE_URL)
+
+
+@pytest.fixture(scope="session")
+def service_url() -> str:
+    """Get the service URL for testing"""
+    return SERVICE_URL
+
+
+class TestStreamingBehavior:
+    """Test streaming response behavior"""
+
+    @pytest.mark.asyncio
+    async def test_basic_streaming_response(self, service_url: str):
+        """Test that responses are properly streamed"""
+        session_id = f"streaming_test_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [{"role": "user", "content": "What is ISO 26262?"}]
+        }
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Collect streaming chunks
+            chunks = []
+            async for chunk in response.aiter_text():
+                chunks.append(chunk)
+                if len(chunks) > 10:  # Get enough chunks to verify streaming
+                    break
+            
+            # Should receive multiple chunks (indicating streaming)
+            assert len(chunks) > 1
+            
+            # Chunks should have content
+            total_content = "".join(chunks)
+            assert len(total_content) > 0
+
+    @pytest.mark.asyncio
+    async def test_ai_sdk_streaming_format(self, service_url: str):
+        """Test AI SDK compatible streaming format"""
+        session_id = f"ai_sdk_streaming_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [{"role": "user", "content": "Explain vehicle safety testing"}]
+        }
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(
+                f"{service_url}/api/ai-sdk/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            assert "text/plain" in response.headers.get("content-type", "")
+            
+            # Test streaming behavior
+            chunk_count = 0
+            total_length = 0
+            
+            async for chunk in response.aiter_text():
+                chunk_count += 1
+                total_length += len(chunk)
+                
+                if chunk_count > 15:  # Collect enough chunks
+                    break
+            
+            # Verify streaming characteristics
+            assert chunk_count > 1  # Multiple chunks
+            assert total_length > 50  # Meaningful content
+
+    @pytest.mark.asyncio
+    async def test_streaming_performance(self, service_url: str):
+        """Test streaming response timing and performance"""
+        session_id = f"streaming_perf_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [{"role": "user", "content": "What are automotive safety standards?"}]
+        }
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            start_time = time.time()
+            
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            first_chunk_time = None
+            chunk_count = 0
+            
+            async for chunk in response.aiter_text():
+                if first_chunk_time is None:
+                    first_chunk_time = time.time()
+                
+                chunk_count += 1
+                if chunk_count > 5:  # Get a few chunks for timing
+                    break
+            
+            # Time to first chunk should be reasonable (< 10 seconds)
+            if first_chunk_time:
+                time_to_first_chunk = first_chunk_time - start_time
+                assert time_to_first_chunk < 10.0
+
+    @pytest.mark.asyncio
+    async def test_streaming_interruption_handling(self, service_url: str):
+        """Test behavior when streaming is interrupted"""
+        session_id = f"streaming_interrupt_{int(time.time())}"
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [{"role": "user", "content": "Tell me about ISO standards"}]
+        }
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Read only a few chunks then stop
+            chunk_count = 0
+            async for chunk in response.aiter_text():
+                chunk_count += 1
+                if chunk_count >= 3:
+                    break  # Interrupt streaming
+            
+            # Should have received some chunks
+            assert chunk_count > 0
+
+
+class TestConcurrentStreaming:
+    """Test concurrent streaming scenarios"""
+
+    @pytest.mark.asyncio
+    async def test_multiple_concurrent_streams(self, service_url: str):
+        """Test multiple concurrent streaming requests"""
+        base_time = int(time.time())
+        
+        async def stream_request(session_suffix: str, question: str):
+            """Make a single streaming request"""
+            session_id = f"concurrent_stream_{base_time}_{session_suffix}"
+            
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json={
+                        "session_id": session_id,
+                        "messages": [{"role": "user", "content": question}]
+                    },
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Read some chunks
+                chunks = 0
+                async for chunk in response.aiter_text():
+                    chunks += 1
+                    if chunks > 5:
+                        break
+                
+                return chunks
+        
+        # Run multiple concurrent streams
+        questions = [
+            "What is ISO 26262?",
+            "Explain NIST framework",
+            "What is GDPR?"
+        ]
+        
+        tasks = [
+            stream_request(f"session_{i}", question)
+            for i, question in enumerate(questions)
+        ]
+        
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        
+        # All streams should complete successfully
+        assert len(results) == 3
+        for result in results:
+            assert not isinstance(result, Exception)
+            assert result > 0  # Each stream should receive chunks
+
+    @pytest.mark.asyncio
+    async def test_same_session_rapid_requests(self, service_url: str):
+        """Test rapid requests in the same session"""
+        session_id = f"rapid_session_{int(time.time())}"
+        
+        questions = [
+            "Hello",
+            "What is ISO 9001?",
+            "Thank you"
+        ]
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            for i, question in enumerate(questions):
+                request_data = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=request_data,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Read some response
+                chunk_count = 0
+                async for chunk in response.aiter_text():
+                    chunk_count += 1
+                    if chunk_count > 3:
+                        break
+                
+                print(f"Request {i+1} completed with {chunk_count} chunks")
+                
+                # Very short delay
+                await asyncio.sleep(0.2)
+
+
+class TestStreamingErrorHandling:
+    """Test error handling during streaming"""
+
+    @pytest.mark.asyncio
+    async def test_streaming_with_invalid_session(self, service_url: str):
+        """Test streaming behavior with edge case session IDs"""
+        test_cases = [
+            "",  # Empty session ID
+            "a" * 1000,  # Very long session ID
+            "session with spaces",  # Session ID with spaces
+            "session/with/slashes"  # Session ID with special chars
+        ]
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            for session_id in test_cases:
+                request_data = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": "Hello"}]
+                }
+                
+                try:
+                    response = await client.post(
+                        f"{service_url}/api/chat",
+                        json=request_data,
+                        headers={"Content-Type": "application/json"}
+                    )
+                    
+                    # Should either work or return validation error
+                    assert response.status_code in [200, 422]
+                    
+                except Exception as e:
+                    # Some edge cases might cause exceptions, which is acceptable
+                    print(f"Session ID '{session_id}' caused exception: {e}")
+
+    @pytest.mark.asyncio
+    async def test_streaming_with_large_messages(self, service_url: str):
+        """Test streaming with large message content"""
+        session_id = f"large_msg_stream_{int(time.time())}"
+        
+        # Create a large message
+        large_content = "Please explain safety standards. " * 100  # ~3KB message
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [{"role": "user", "content": large_content}]
+        }
+        
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            # Should handle large messages appropriately
+            assert response.status_code in [200, 413, 422]
+            
+            if response.status_code == 200:
+                # If accepted, should stream properly
+                chunk_count = 0
+                async for chunk in response.aiter_text():
+                    chunk_count += 1
+                    if chunk_count > 5:
+                        break
+                
+                assert chunk_count > 0
+
+
+class TestStreamingContentValidation:
+    """Test streaming content quality and format"""
+
+    @pytest.mark.asyncio
+    async def test_streaming_content_encoding(self, service_url: str):
+        """Test that streaming content is properly encoded"""
+        session_id = f"encoding_test_{int(time.time())}"
+        
+        # Test with special characters and unicode
+        test_message = "What is ISO 26262? Please explain with émphasis on safety ñorms."
+        
+        request_data = {
+            "session_id": session_id,
+            "messages": [{"role": "user", "content": test_message}]
+        }
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(
+                f"{service_url}/api/chat",
+                json=request_data,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            assert response.status_code == 200
+            
+            # Collect content and verify encoding
+            content = ""
+            async for chunk in response.aiter_text():
+                content += chunk
+                if len(content) > 100:
+                    break
+            
+            # Content should be valid UTF-8
+            assert isinstance(content, str)
+            assert len(content) > 0
+            
+            # Should be able to encode/decode
+            encoded = content.encode('utf-8')
+            decoded = encoded.decode('utf-8')
+            assert decoded == content
+
+    @pytest.mark.asyncio
+    async def test_streaming_response_consistency(self, service_url: str):
+        """Test that streaming responses are consistent for similar queries"""
+        base_session = f"consistency_test_{int(time.time())}"
+        
+        # Ask the same question multiple times
+        test_question = "What is ISO 26262?"
+        
+        responses = []
+        
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            for i in range(3):
+                session_id = f"{base_session}_{i}"
+                
+                request_data = {
+                    "session_id": session_id,
+                    "messages": [{"role": "user", "content": test_question}]
+                }
+                
+                response = await client.post(
+                    f"{service_url}/api/chat",
+                    json=request_data,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                assert response.status_code == 200
+                
+                # Collect response
+                content = ""
+                async for chunk in response.aiter_text():
+                    content += chunk
+                    if len(content) > 200:
+                        break
+                
+                responses.append(content)
+                await asyncio.sleep(0.5)
+        
+        # All responses should have content
+        for response_content in responses:
+            assert len(response_content) > 50
+        
+        # Responses should have some consistency (all non-empty)
+        assert len([r for r in responses if r.strip()]) == len(responses)
				`@@ -0,0 +1 @@`
				`# Empty __init__.py files to make test packages`