feat(bootstrap): refactor runtime dependency management and add lazy loading for binary store and vector index

feat(agent): update import for agent session service
feat(openai): add context truncation check in OpenAI answer generator
docs(README): update frontend environment file conventions
fix(vite): default local frontend development to local backend
This commit is contained in:
ash66
2026-05-25 13:58:48 +08:00
parent 091a02c522
commit 10a034e294
11 changed files with 162 additions and 15 deletions

View File

@@ -67,6 +67,21 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
)
return messages, context_tokens
def _is_context_truncated(self, *, retrieved_chunks: list[RetrievedChunk], context_tokens: int) -> bool:
"""Return whether the prompt context had to omit retrieved chunks to fit the token budget."""
if not retrieved_chunks:
return False
estimated_total_tokens = sum(
self._estimate_tokens(
f"[{idx}] 文档: {chunk.doc_name}\n"
f"章节: {chunk.section_title or '未标注'}\n"
f"页码: {chunk.page_number}\n"
f"内容: {chunk.content}"
)
for idx, chunk in enumerate(retrieved_chunks, start=1)
)
return estimated_total_tokens > context_tokens
def _sources(self, chunks: list[RetrievedChunk]) -> list[AnswerSource]:
"""Handle sources for this module for the Open A I Compatible Answer Generator instance."""
return [
@@ -111,7 +126,10 @@ class OpenAICompatibleAnswerGenerator(AnswerGenerator):
latency_ms=latency_ms,
retrieved_count=len(retrieved_chunks),
context_tokens=context_tokens,
truncated=len(retrieved_chunks) > len(messages),
truncated=self._is_context_truncated(
retrieved_chunks=retrieved_chunks,
context_tokens=context_tokens,
),
error=response.error,
)