2026-05-18 16:32:42 +08:00
|
|
|
"""Define domain models for retrieval."""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
|
from typing import Any
|
|
|
|
|
# Keep module behavior explicit so the backend flow stays easy to audit.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class RetrievalQuery:
|
|
|
|
|
"""Represent the Retrieval Query type."""
|
|
|
|
|
query: str
|
|
|
|
|
top_k: int
|
|
|
|
|
filters: str | None = None
|
|
|
|
|
|
|
|
|
|
|
2026-05-26 20:21:31 +08:00
|
|
|
@dataclass(init=False)
|
2026-05-18 16:32:42 +08:00
|
|
|
class RetrievedChunk:
|
2026-05-26 20:21:31 +08:00
|
|
|
"""Represent the retrieved chunk payload with legacy aliases."""
|
|
|
|
|
|
2026-05-18 16:32:42 +08:00
|
|
|
chunk_id: str
|
|
|
|
|
doc_id: str
|
2026-05-26 20:21:31 +08:00
|
|
|
doc_title: str
|
|
|
|
|
text: str
|
2026-05-18 16:32:42 +08:00
|
|
|
score: float
|
2026-05-26 20:21:31 +08:00
|
|
|
chunk_type: str = ""
|
2026-05-18 16:32:42 +08:00
|
|
|
section_title: str = ""
|
2026-05-26 20:21:31 +08:00
|
|
|
page_start: int = 0
|
|
|
|
|
page_end: int = 0
|
|
|
|
|
section_level: int = 0
|
|
|
|
|
chunk_index: int = 0
|
|
|
|
|
piece_index: int = 0
|
2026-05-18 16:32:42 +08:00
|
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
2026-05-26 20:21:31 +08:00
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
chunk_id: str,
|
|
|
|
|
doc_id: str,
|
|
|
|
|
doc_title: str | None = None,
|
|
|
|
|
text: str | None = None,
|
|
|
|
|
score: float = 0.0,
|
|
|
|
|
chunk_type: str = "",
|
|
|
|
|
section_title: str = "",
|
|
|
|
|
page_start: int = 0,
|
|
|
|
|
page_end: int = 0,
|
|
|
|
|
section_level: int = 0,
|
|
|
|
|
chunk_index: int = 0,
|
|
|
|
|
piece_index: int = 0,
|
|
|
|
|
metadata: dict[str, Any] | None = None,
|
|
|
|
|
doc_name: str | None = None,
|
|
|
|
|
content: str | None = None,
|
|
|
|
|
page_number: int | None = None,
|
|
|
|
|
block_type: str | None = None,
|
|
|
|
|
**_: Any,
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Initialize the retrieved chunk while accepting legacy field names."""
|
|
|
|
|
self.chunk_id = chunk_id
|
|
|
|
|
self.doc_id = doc_id
|
|
|
|
|
self.doc_title = doc_title if doc_title is not None else (doc_name or "")
|
|
|
|
|
self.text = text if text is not None else (content or "")
|
|
|
|
|
self.score = float(score)
|
|
|
|
|
self.chunk_type = chunk_type or (block_type or "")
|
|
|
|
|
self.section_title = section_title
|
|
|
|
|
self.page_start = int(page_start or page_number or 0)
|
|
|
|
|
self.page_end = int(page_end or self.page_start)
|
|
|
|
|
self.section_level = int(section_level or 0)
|
|
|
|
|
self.chunk_index = int(chunk_index or 0)
|
|
|
|
|
self.piece_index = int(piece_index or 0)
|
|
|
|
|
self.metadata = dict(metadata or {})
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def doc_name(self) -> str:
|
|
|
|
|
"""Return the legacy document name alias."""
|
|
|
|
|
return self.doc_title
|
|
|
|
|
|
|
|
|
|
@doc_name.setter
|
|
|
|
|
def doc_name(self, value: str) -> None:
|
|
|
|
|
"""Update the legacy document name alias."""
|
|
|
|
|
self.doc_title = value
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def content(self) -> str:
|
|
|
|
|
"""Return the legacy content alias."""
|
|
|
|
|
return self.text
|
|
|
|
|
|
|
|
|
|
@content.setter
|
|
|
|
|
def content(self, value: str) -> None:
|
|
|
|
|
"""Update the legacy content alias."""
|
|
|
|
|
self.text = value
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def page_number(self) -> int:
|
|
|
|
|
"""Return the legacy page number alias."""
|
|
|
|
|
return self.page_start
|
|
|
|
|
|
|
|
|
|
@page_number.setter
|
|
|
|
|
def page_number(self, value: int) -> None:
|
|
|
|
|
"""Update the legacy page number alias."""
|
|
|
|
|
self.page_start = value
|
|
|
|
|
self.page_end = max(self.page_end, value)
|