Files
AIRegulation-DocAnalysis/backend/app/domain/conversation/models.py
ash66 30c7bda389 Refactor document handling and update Milvus collection settings
- Removed multiple failed document entries from `documents.json`.
- Added a new document entry with updated metadata and changed the index name to `regulations_dense_1024_v2`.
- Updated architecture documentation to reflect changes in the Milvus collection name.
- Adjusted requirements by removing the sqlalchemy dependency.
- Modified test cases to align with new document structure and naming conventions.
- Introduced a new test file for Milvus vector index runtime recovery and error handling.
- Updated assertions in various test files to ensure compatibility with the new schema.
2026-05-26 20:21:31 +08:00

127 lines
3.5 KiB
Python

"""Define domain models for conversation."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
# Keep module behavior explicit so the backend flow stays easy to audit.
@dataclass(init=False)
class AnswerSource:
"""Represent answer source data with legacy aliases."""
doc_id: str
doc_title: str
chunk_id: str
chunk_type: str
section_title: str
page_start: int
page_end: int
section_level: int
chunk_index: int
piece_index: int
score: float
text: str
metadata: dict[str, Any] = field(default_factory=dict)
def __init__(
self,
*,
doc_id: str,
doc_title: str | None = None,
chunk_id: str,
chunk_type: str = "",
section_title: str = "",
page_start: int = 0,
page_end: int = 0,
section_level: int = 0,
chunk_index: int = 0,
piece_index: int = 0,
score: float = 0.0,
text: str | None = None,
metadata: dict[str, Any] | None = None,
doc_name: str | None = None,
content: str | None = None,
page_number: int | None = None,
**_: Any,
) -> None:
"""Initialize the answer source while accepting legacy field names."""
self.doc_id = doc_id
self.doc_title = doc_title if doc_title is not None else (doc_name or "")
self.chunk_id = chunk_id
self.chunk_type = chunk_type
self.section_title = section_title
self.page_start = int(page_start or page_number or 0)
self.page_end = int(page_end or self.page_start)
self.section_level = int(section_level or 0)
self.chunk_index = int(chunk_index or 0)
self.piece_index = int(piece_index or 0)
self.score = float(score)
self.text = text if text is not None else (content or "")
self.metadata = dict(metadata or {})
@property
def doc_name(self) -> str:
"""Return the legacy document name alias."""
return self.doc_title
@doc_name.setter
def doc_name(self, value: str) -> None:
"""Update the legacy document name alias."""
self.doc_title = value
@property
def content(self) -> str:
"""Return the legacy content alias."""
return self.text
@content.setter
def content(self, value: str) -> None:
"""Update the legacy content alias."""
self.text = value
@property
def page_number(self) -> int:
"""Return the legacy page number alias."""
return self.page_start
@page_number.setter
def page_number(self, value: int) -> None:
"""Update the legacy page number alias."""
self.page_start = value
self.page_end = max(self.page_end, value)
@dataclass
class ConversationMessage:
"""Represent conversation message data."""
role: str
content: str
timestamp: int
sources: list[dict[str, Any]] = field(default_factory=list)
@dataclass
class ConversationSession:
"""Represent conversation session data."""
session_id: str
messages: list[ConversationMessage] = field(default_factory=list)
created_at: int = 0
updated_at: int = 0
metadata: dict[str, Any] = field(default_factory=dict)
@dataclass
class AnswerResult:
"""Represent answer result data."""
answer: str
sources: list[AnswerSource] = field(default_factory=list)
model: str = ""
latency_ms: int = 0
retrieved_count: int = 0
context_tokens: int = 0
truncated: bool = False
error: str | None = None