catonline_ai/vw-agentic-rag/service/retrieval/generic_chunk_retrieval.py

import logging


import time

from ..config import get_config
from service.retrieval.clients import AzureSearchClient
from service.retrieval.model import RetrievalResponse


logger = logging.getLogger(__name__)


class GenericChunkRetrieval:
    def __init__(self)->None:
        self.config = get_config()
        self.search_client = AzureSearchClient()

    async def retrieve_doc_chunk(
        self,
        query: str,
        conversation_history: str = "",
        **kwargs
    ) -> RetrievalResponse:
        """Search CATOnline system user manual document chunks"""
        start_time = time.time()

        # Use the new Azure AI Search approach
        index_name = self.config.retrieval.index.chunk_user_manual_index
        vector_fields = "contentVector"
        select_fields = "content, title, full_headers"
        search_fields = "content, title, full_headers"

        top_k = kwargs.get("top_k", 10)
        score_threshold = kwargs.get("score_threshold", 1.5)

        try:
            response_data = await self.search_client.search_azure_ai(
                index_name=index_name,
                search_text=query,
                vector_fields=vector_fields,
                select_fields=select_fields,
                search_fields=search_fields,
                top_k=top_k,
                score_threshold=score_threshold
            )

            results = response_data.get("value", [])

            took_ms = int((time.time() - start_time) * 1000)
            return RetrievalResponse(
                results=results,
                took_ms=took_ms,
                total_count=len(results)
            )
        except Exception as e:
            logger.error(f"retrieve_doc_chunk_user_manual failed: {e}")
            raise
init 2025-09-26 17:15:54 +08:00			`import logging`


			`import time`

			`from ..config import get_config`
			`from service.retrieval.clients import AzureSearchClient`
			`from service.retrieval.model import RetrievalResponse`


			`logger = logging.getLogger(__name__)`


			`class GenericChunkRetrieval:`
			`def __init__(self)->None:`
			`self.config = get_config()`
			`self.search_client = AzureSearchClient()`

			`async def retrieve_doc_chunk(`
			`self,`
			`query: str,`
			`conversation_history: str = "",`
			`**kwargs`
			`) -> RetrievalResponse:`
			`"""Search CATOnline system user manual document chunks"""`
			`start_time = time.time()`

			`# Use the new Azure AI Search approach`
			`index_name = self.config.retrieval.index.chunk_user_manual_index`
			`vector_fields = "contentVector"`
			`select_fields = "content, title, full_headers"`
			`search_fields = "content, title, full_headers"`

			`top_k = kwargs.get("top_k", 10)`
			`score_threshold = kwargs.get("score_threshold", 1.5)`

			`try:`
			`response_data = await self.search_client.search_azure_ai(`
			`index_name=index_name,`
			`search_text=query,`
			`vector_fields=vector_fields,`
			`select_fields=select_fields,`
			`search_fields=search_fields,`
			`top_k=top_k,`
			`score_threshold=score_threshold`
			`)`

			`results = response_data.get("value", [])`

			`took_ms = int((time.time() - start_time) * 1000)`
			`return RetrievalResponse(`
			`results=results,`
			`took_ms=took_ms,`
			`total_count=len(results)`
			`)`
			`except Exception as e:`
			`logger.error(f"retrieve_doc_chunk_user_manual failed: {e}")`
			`raise`