feat(bootstrap): refactor runtime dependency management and add lazy loading for binary store and vector index

feat(agent): update import for agent session service feat(openai): add context truncation check in OpenAI answer generator docs(README): update frontend environment file conventions fix(vite): default local frontend development to local backend
2026-05-25 13:58:48 +08:00
parent 091a02c522
commit 10a034e294
11 changed files with 162 additions and 15 deletions
--- a/backend/app/infrastructure/llm/openai_compatible_answer_generator.py
+++ b/backend/app/infrastructure/llm/openai_compatible_answer_generator.py
@@ -67,6 +67,21 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
        )
        return messages, context_tokens

+    def _is_context_truncated(self, *, retrieved_chunks: list[RetrievedChunk], context_tokens: int) -> bool:
+        """Return whether the prompt context had to omit retrieved chunks to fit the token budget."""
+        if not retrieved_chunks:
+            return False
+        estimated_total_tokens = sum(
+            self._estimate_tokens(
+                f"[{idx}] 文档: {chunk.doc_name}\n"
+                f"章节: {chunk.section_title or '未标注'}\n"
+                f"页码: {chunk.page_number}\n"
+                f"内容: {chunk.content}"
+            )
+            for idx, chunk in enumerate(retrieved_chunks, start=1)
+        )
+        return estimated_total_tokens > context_tokens
+
    def _sources(self, chunks: list[RetrievedChunk]) -> list[AnswerSource]:
        """Handle sources for this module for the Open A I Compatible Answer Generator instance."""
        return [
@@ -111,7 +126,10 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
            latency_ms=latency_ms,
            retrieved_count=len(retrieved_chunks),
            context_tokens=context_tokens,
-            truncated=len(retrieved_chunks) > len(messages),
+            truncated=self._is_context_truncated(
+                retrieved_chunks=retrieved_chunks,
+                context_tokens=context_tokens,
+            ),
            error=response.error,
        )