"""Implement infrastructure support for vector chunk builder.""" from __future__ import annotations from app.domain.documents import Chunk, ChunkBuilder, ParsedDocument # Keep adapter behavior explicit so integration details remain easy to audit. class AliyunVectorChunkBuilder(ChunkBuilder): """Provide the Aliyun Vector Chunk Builder builder.""" def build( self, *, parsed_document: ParsedDocument, regulation_type: str, version: str, ) -> list[Chunk]: """Handle build for the Aliyun Vector Chunk Builder instance.""" chunks: list[Chunk] = [] for index, item in enumerate(parsed_document.vector_chunks): content = item.get("content") or item.get("text") or "" embedding_text = item.get("embedding_text") or content if not embedding_text.strip(): continue section_path = item.get("section_path") or [] section_title = item.get("section_title") or (section_path[-1] if section_path else "") page_number = item.get("page_start") or item.get("page") or 0 chunk_id = item.get("chunk_id") or f"{parsed_document.doc_id}-chunk-{index}" metadata = {k: v for k, v in item.items() if k not in {"content", "embedding_text"}} chunks.append( Chunk( chunk_id=str(chunk_id), doc_id=parsed_document.doc_id, doc_name=parsed_document.doc_name, content=content, embedding_text=embedding_text, section_title=section_title, section_path=section_path, page_number=int(page_number or 0), regulation_type=regulation_type, version=version, semantic_id=item.get("semantic_id", ""), block_type=item.get("block_type", ""), metadata=metadata, ) ) return chunks