init
This commit is contained in:
58
vw-agentic-rag/service/retrieval/generic_chunk_retrieval.py
Normal file
58
vw-agentic-rag/service/retrieval/generic_chunk_retrieval.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import logging
|
||||
|
||||
|
||||
import time
|
||||
|
||||
from ..config import get_config
|
||||
from service.retrieval.clients import AzureSearchClient
|
||||
from service.retrieval.model import RetrievalResponse
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GenericChunkRetrieval:
|
||||
def __init__(self)->None:
|
||||
self.config = get_config()
|
||||
self.search_client = AzureSearchClient()
|
||||
|
||||
async def retrieve_doc_chunk(
|
||||
self,
|
||||
query: str,
|
||||
conversation_history: str = "",
|
||||
**kwargs
|
||||
) -> RetrievalResponse:
|
||||
"""Search CATOnline system user manual document chunks"""
|
||||
start_time = time.time()
|
||||
|
||||
# Use the new Azure AI Search approach
|
||||
index_name = self.config.retrieval.index.chunk_user_manual_index
|
||||
vector_fields = "contentVector"
|
||||
select_fields = "content, title, full_headers"
|
||||
search_fields = "content, title, full_headers"
|
||||
|
||||
top_k = kwargs.get("top_k", 10)
|
||||
score_threshold = kwargs.get("score_threshold", 1.5)
|
||||
|
||||
try:
|
||||
response_data = await self.search_client.search_azure_ai(
|
||||
index_name=index_name,
|
||||
search_text=query,
|
||||
vector_fields=vector_fields,
|
||||
select_fields=select_fields,
|
||||
search_fields=search_fields,
|
||||
top_k=top_k,
|
||||
score_threshold=score_threshold
|
||||
)
|
||||
|
||||
results = response_data.get("value", [])
|
||||
|
||||
took_ms = int((time.time() - start_time) * 1000)
|
||||
return RetrievalResponse(
|
||||
results=results,
|
||||
took_ms=took_ms,
|
||||
total_count=len(results)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"retrieve_doc_chunk_user_manual failed: {e}")
|
||||
raise
|
||||
Reference in New Issue
Block a user