59 lines
1.7 KiB
Python
59 lines
1.7 KiB
Python
|
|
import logging
|
||
|
|
|
||
|
|
|
||
|
|
import time
|
||
|
|
|
||
|
|
from ..config import get_config
|
||
|
|
from service.retrieval.clients import AzureSearchClient
|
||
|
|
from service.retrieval.model import RetrievalResponse
|
||
|
|
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
|
||
|
|
class GenericChunkRetrieval:
|
||
|
|
def __init__(self)->None:
|
||
|
|
self.config = get_config()
|
||
|
|
self.search_client = AzureSearchClient()
|
||
|
|
|
||
|
|
async def retrieve_doc_chunk(
|
||
|
|
self,
|
||
|
|
query: str,
|
||
|
|
conversation_history: str = "",
|
||
|
|
**kwargs
|
||
|
|
) -> RetrievalResponse:
|
||
|
|
"""Search CATOnline system user manual document chunks"""
|
||
|
|
start_time = time.time()
|
||
|
|
|
||
|
|
# Use the new Azure AI Search approach
|
||
|
|
index_name = self.config.retrieval.index.chunk_user_manual_index
|
||
|
|
vector_fields = "contentVector"
|
||
|
|
select_fields = "content, title, full_headers"
|
||
|
|
search_fields = "content, title, full_headers"
|
||
|
|
|
||
|
|
top_k = kwargs.get("top_k", 10)
|
||
|
|
score_threshold = kwargs.get("score_threshold", 1.5)
|
||
|
|
|
||
|
|
try:
|
||
|
|
response_data = await self.search_client.search_azure_ai(
|
||
|
|
index_name=index_name,
|
||
|
|
search_text=query,
|
||
|
|
vector_fields=vector_fields,
|
||
|
|
select_fields=select_fields,
|
||
|
|
search_fields=search_fields,
|
||
|
|
top_k=top_k,
|
||
|
|
score_threshold=score_threshold
|
||
|
|
)
|
||
|
|
|
||
|
|
results = response_data.get("value", [])
|
||
|
|
|
||
|
|
took_ms = int((time.time() - start_time) * 1000)
|
||
|
|
return RetrievalResponse(
|
||
|
|
results=results,
|
||
|
|
took_ms=took_ms,
|
||
|
|
total_count=len(results)
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"retrieve_doc_chunk_user_manual failed: {e}")
|
||
|
|
raise
|