feat: optimize WebUI stream output and sanitize user-facing answers

2026-03-13 13:14:37 +08:00
parent 8dc5354fa4
commit 33c357a1de
8 changed files with 228 additions and 45 deletions
--- a/internal/agent/orchestrator.go
+++ b/internal/agent/orchestrator.go
@@ -358,16 +358,17 @@ func (o *Orchestrator) buildUnifiedSystemPrompt(userInput string, routedSkills [
 		"",
 		"===== ReAct 思考指引 =====",
 		"你采用 ReAct（Reasoning + Acting）模式进行任务处理。",
-		"1. 思考优先：在做出任何行动之前，先在回复中阐述你的推理过程（Thought）。",
+		"1. 思考优先：在做出任何行动之前，先完成内部推理，但不要把 Thought、trace、step 暴露给用户。",
 		"2. 工具调用：如果需要获取信息或执行操作，使用提供的工具函数（function calling）进行调用。",
 		"3. 观察反馈：检查工具返回的结果，据此决定下一步行动。",
-		"4. 最终回答：当你有足够信息时，直接给出面向用户的最终文本回复，不要调用工具。",
+		"4. 最终回答：当你有足够信息时，只输出面向用户的最终文本回复，不要附带推理轨迹，不要调用工具。",
 		"",
 		"注意事项：",
 		"- 每次要么调用工具，要么给出最终回答，不要两者都做。",
 		"- 如果工具调用失败，根据错误信息（Traceback）调整策略后重试或给出替代方案。",
 		"- 涉及文件、目录、命令时，优先调用工具获取真实结果，不要猜测。",
-		"- 你的思考过程（Thought）应写在回复内容中，帮助追踪推理逻辑。",
+		"- 如果本轮需要调用工具，可以在 assistant content 中写简短内部推理，供系统记录日志；这些内容不会直接展示给用户。",
+		"- 最终用户可见内容中禁止出现 Thought、Trace、Step、Observation、Action、ActionInput 等字段或标题。",
 		"",
 		"===== 运行环境 =====",
 		runtimeDoc,
@@ -556,7 +557,7 @@ func (o *Orchestrator) runUnifiedReAct(ctx context.Context, chatID, userID, comp
 	const maxSteps = 20
 	for step := 1; step <= maxSteps; step++ {
 		if o.log != nil {
-			o.log.Infof("%s react step=%d start messages_count=%d", traceLogPrefix, step, len(messages))
+			o.log.Debugf("%s react step=%d start messages_count=%d", traceLogPrefix, step, len(messages))
 		}

 		// 调用 LLM（传入完整 messages + tools 定义）
@@ -566,7 +567,7 @@ func (o *Orchestrator) runUnifiedReAct(ctx context.Context, chatID, userID, comp
 		}

 		if o.log != nil {
-			o.log.Infof("%s react step=%d content_len=%d tool_calls=%d",
+			o.log.Debugf("%s react step=%d content_len=%d tool_calls=%d",
 				traceLogPrefix, step, len(completion.Content), len(completion.ToolCalls))
 			if completion.Content != "" {
 				o.log.Debugf("%s react step=%d thought=%q", traceLogPrefix, step, completion.Content)
@@ -575,12 +576,12 @@ func (o *Orchestrator) runUnifiedReAct(ctx context.Context, chatID, userID, comp

 		// ========== 无 tool_calls → 最终回答 ==========
 		if len(completion.ToolCalls) == 0 {
-			finalText := strings.TrimSpace(completion.Content)
+			finalText := sanitizeUserFacingAnswer(completion.Content)
 			if finalText == "" {
 				finalText = "已完成处理。"
 			}
 			if o.log != nil {
-				o.log.Infof("%s react final at step=%d answer_len=%d", traceLogPrefix, step, len(finalText))
+				o.log.Debugf("%s react final at step=%d answer_len=%d", traceLogPrefix, step, len(finalText))
 			}
 			return finalText, nil
 		}
@@ -614,7 +615,7 @@ func (o *Orchestrator) runUnifiedReAct(ctx context.Context, chatID, userID, comp
 			}

 			if o.log != nil {
-				o.log.Infof("%s react step=%d tool_call tool=%s input_len=%d", traceLogPrefix, step, toolName, len(toolInput))
+				o.log.Debugf("%s react step=%d tool_call tool=%s input_len=%d", traceLogPrefix, step, toolName, len(toolInput))
 				o.log.Debugf("%s react step=%d tool=%s input=%q", traceLogPrefix, step, toolName, toolInput)
 			}

@@ -633,7 +634,7 @@ func (o *Orchestrator) runUnifiedReAct(ctx context.Context, chatID, userID, comp
 			}

 			if o.log != nil {
-				o.log.Infof("%s react step=%d tool=%s observation_len=%d", traceLogPrefix, step, toolName, len(obs))
+				o.log.Debugf("%s react step=%d tool=%s observation_len=%d", traceLogPrefix, step, toolName, len(obs))
 				o.log.Debugf("%s react step=%d tool=%s observation=%q", traceLogPrefix, step, toolName, truncateForLog(obs, 500))
 			}

@@ -711,7 +712,7 @@ func (o *Orchestrator) runUnifiedReActStream(ctx context.Context, chatID, userID
 	const maxSteps = 20
 	for step := 1; step <= maxSteps; step++ {
 		if o.log != nil {
-			o.log.Infof("%s react stream step=%d start messages_count=%d", traceLogPrefix, step, len(messages))
+			o.log.Debugf("%s react stream step=%d start messages_count=%d", traceLogPrefix, step, len(messages))
 		}

 		// 调用 LLM
@@ -721,7 +722,7 @@ func (o *Orchestrator) runUnifiedReActStream(ctx context.Context, chatID, userID
 		}

 		if o.log != nil {
-			o.log.Infof("%s react stream step=%d content_len=%d tool_calls=%d",
+			o.log.Debugf("%s react stream step=%d content_len=%d tool_calls=%d",
 				traceLogPrefix, step, len(completion.Content), len(completion.ToolCalls))
 			if completion.Content != "" {
 				o.log.Debugf("%s react stream step=%d thought=%q", traceLogPrefix, step, completion.Content)
@@ -741,12 +742,12 @@ func (o *Orchestrator) runUnifiedReActStream(ctx context.Context, chatID, userID

 		// ========== 无 tool_calls → 最终回答 ==========
 		if len(completion.ToolCalls) == 0 {
-			finalText := strings.TrimSpace(completion.Content)
+			finalText := sanitizeUserFacingAnswer(completion.Content)
 			if finalText == "" {
 				finalText = "已完成处理。"
 			}
 			if o.log != nil {
-				o.log.Infof("%s react stream final at step=%d answer_len=%d", traceLogPrefix, step, len(finalText))
+				o.log.Debugf("%s react stream final at step=%d answer_len=%d", traceLogPrefix, step, len(finalText))
 			}
 			// 推送最终答案事件
 			if err := callback(StreamEvent{
@@ -808,7 +809,7 @@ func (o *Orchestrator) runUnifiedReActStream(ctx context.Context, chatID, userID
 			}

 			if o.log != nil {
-				o.log.Infof("%s react stream step=%d tool_call tool=%s input_len=%d", traceLogPrefix, step, toolName, len(toolInput))
+				o.log.Debugf("%s react stream step=%d tool_call tool=%s input_len=%d", traceLogPrefix, step, toolName, len(toolInput))
 				o.log.Debugf("%s react stream step=%d tool=%s input=%q", traceLogPrefix, step, toolName, toolInput)
 			}

@@ -827,7 +828,7 @@ func (o *Orchestrator) runUnifiedReActStream(ctx context.Context, chatID, userID
 			}

 			if o.log != nil {
-				o.log.Infof("%s react stream step=%d tool=%s observation_len=%d", traceLogPrefix, step, toolName, len(obs))
+				o.log.Debugf("%s react stream step=%d tool=%s observation_len=%d", traceLogPrefix, step, toolName, len(obs))
 				o.log.Debugf("%s react stream step=%d tool=%s observation=%q", traceLogPrefix, step, toolName, truncateForLog(obs, 500))
 			}

@@ -872,7 +873,7 @@ func (o *Orchestrator) runLegacyReAct(ctx context.Context, chatID, userID, compr

 	for step := 1; step <= maxSteps; step++ {
 		if o.log != nil {
-			o.log.Infof("%s legacy react step=%d start", traceLogPrefix, step)
+			o.log.Debugf("%s legacy react step=%d start", traceLogPrefix, step)
 		}

 		messages := buildReActMessages(systemPrompt, compressedContext, userInput, scratchpad)
@@ -890,10 +891,10 @@ func (o *Orchestrator) runLegacyReAct(ctx context.Context, chatID, userID, compr
 		if decision.IsFinalAnswer {
 			finalText := ""
 			if decision.FinalAnswer != nil {
-				finalText = strings.TrimSpace(*decision.FinalAnswer)
+				finalText = sanitizeUserFacingAnswer(*decision.FinalAnswer)
 			}
 			if finalText == "" {
-				finalText = strings.TrimSpace(decision.Thought)
+				finalText = sanitizeUserFacingAnswer(decision.Thought)
 			}
 			if finalText == "" {
 				finalText = "已完成处理。"
@@ -1518,3 +1519,52 @@ func truncateForLog(s string, maxLen int) string {
 	}
 	return s[:maxLen] + "...(truncated)"
 }
+
+func sanitizeUserFacingAnswer(raw string) string {
+	raw = strings.ReplaceAll(raw, "\r\n", "\n")
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return ""
+	}
+
+	markers := []string{"Final Answer:", "Final Answer：", "最终回答:", "最终回答：", "最终答案:", "最终答案：", "Answer:", "Answer："}
+	for _, marker := range markers {
+		idx := strings.LastIndex(raw, marker)
+		if idx >= 0 {
+			candidate := strings.TrimSpace(raw[idx+len(marker):])
+			if candidate != "" {
+				return candidate
+			}
+		}
+	}
+
+	lines := strings.Split(raw, "\n")
+	cleaned := make([]string, 0, len(lines))
+	for _, line := range lines {
+		trimmed := strings.TrimSpace(line)
+		if trimmed == "" {
+			if len(cleaned) > 0 && cleaned[len(cleaned)-1] != "" {
+				cleaned = append(cleaned, "")
+			}
+			continue
+		}
+		lower := strings.ToLower(trimmed)
+		if strings.HasPrefix(lower, "thought:") || strings.HasPrefix(lower, "trace:") || strings.HasPrefix(lower, "observation:") ||
+			strings.HasPrefix(lower, "action:") || strings.HasPrefix(lower, "actioninput:") || strings.HasPrefix(lower, "action input:") ||
+			strings.HasPrefix(lower, "step ") || strings.HasPrefix(trimmed, "思考：") || strings.HasPrefix(trimmed, "思考:") ||
+			strings.HasPrefix(trimmed, "推理：") || strings.HasPrefix(trimmed, "推理:") || strings.HasPrefix(trimmed, "观察：") ||
+			strings.HasPrefix(trimmed, "观察:") || strings.HasPrefix(trimmed, "行动：") || strings.HasPrefix(trimmed, "行动:") ||
+			strings.HasPrefix(trimmed, "步骤 ") {
+			continue
+		}
+		cleaned = append(cleaned, trimmed)
+	}
+
+	for len(cleaned) > 0 && cleaned[len(cleaned)-1] == "" {
+		cleaned = cleaned[:len(cleaned)-1]
+	}
+	if len(cleaned) == 0 {
+		return ""
+	}
+	return strings.TrimSpace(strings.Join(cleaned, "\n"))
+}
--- a/internal/agent/orchestrator_skill_selection_test.go
+++ b/internal/agent/orchestrator_skill_selection_test.go
@@ -93,3 +93,32 @@ func TestMatchSkillsByNameEmpty(t *testing.T) {
 		t.Fatalf("expected 0 matches, got %d", len(matched))
 	}
 }
+
+func TestSanitizeUserFacingAnswerExtractsFinalAnswer(t *testing.T) {
+	raw := "Thought: 先分析用户问题\nObservation: 已经有足够信息\nFinal Answer: 这是给用户的结果"
+	got := sanitizeUserFacingAnswer(raw)
+	if got != "这是给用户的结果" {
+		t.Fatalf("expected final answer only, got %q", got)
+	}
+}
+
+func TestSanitizeUserFacingAnswerDropsTraceLines(t *testing.T) {
+	raw := strings.Join([]string{
+		"Step 1 Thought: 检查上下文",
+		"Action: shell",
+		"Observation: ok",
+		"请执行以下变更。",
+	}, "\n")
+	got := sanitizeUserFacingAnswer(raw)
+	if got != "请执行以下变更。" {
+		t.Fatalf("expected user-facing text only, got %q", got)
+	}
+}
+
+func TestSanitizeUserFacingAnswerKeepsNormalAnswer(t *testing.T) {
+	raw := "1. 先打开配置文件\n2. 修改端口后重启服务"
+	got := sanitizeUserFacingAnswer(raw)
+	if got != raw {
+		t.Fatalf("expected answer unchanged, got %q", got)
+	}
+}
--- a/internal/llm/client.go
+++ b/internal/llm/client.go
@@ -81,9 +81,10 @@ type InputFile struct {
 }

 type OpenAICompatibleClient struct {
-	client openai.Client
-	model  string
-	log    *logger.Logger
+	client               openai.Client
+	model                string
+	disableThinkingParam bool
+	log                  *logger.Logger
 }

 func NewOpenAICompatibleClient(cfg config.LLMConfig, log *logger.Logger) *OpenAICompatibleClient {
@@ -95,9 +96,10 @@ func NewOpenAICompatibleClient(cfg config.LLMConfig, log *logger.Logger) *OpenAI
 		opts = append(opts, option.WithBaseURL(cfg.BaseURL))
 	}
 	return &OpenAICompatibleClient{
-		client: openai.NewClient(opts...),
-		model:  cfg.Model,
-		log:    log,
+		client:               openai.NewClient(opts...),
+		model:                cfg.Model,
+		disableThinkingParam: shouldDisableThinkingParam(cfg.BaseURL),
+		log:                  log,
 	}
 }

@@ -138,7 +140,7 @@ func (c *OpenAICompatibleClient) GenerateWithTools(ctx context.Context, messages
 		}
 	}

-	resp, err := c.client.Chat.Completions.New(ctx, params)
+	resp, err := c.client.Chat.Completions.New(ctx, params, c.chatCompletionRequestOptions()...)
 	if err != nil {
 		return nil, fmt.Errorf("llm tool-call request failed: %w", err)
 	}
@@ -180,7 +182,7 @@ func (c *OpenAICompatibleClient) generateWithMessagesInternal(ctx context.Contex
 		Messages: sdkMessages,
 	}

-	resp, err := c.client.Chat.Completions.New(ctx, params)
+	resp, err := c.client.Chat.Completions.New(ctx, params, c.chatCompletionRequestOptions()...)
 	if err != nil {
 		if c.log != nil {
 			c.log.Errorf("llm request failed err=%v", err)
@@ -392,3 +394,18 @@ func appendIfMissing(items []string, value string) []string {
 	}
 	return append(items, value)
 }
+
+func (c *OpenAICompatibleClient) chatCompletionRequestOptions() []option.RequestOption {
+	if !c.disableThinkingParam {
+		return nil
+	}
+	return []option.RequestOption{option.WithJSONSet("enable_thinking", false)}
+}
+
+func shouldDisableThinkingParam(baseURL string) bool {
+	baseURL = strings.ToLower(strings.TrimSpace(baseURL))
+	if baseURL == "" {
+		return false
+	}
+	return strings.Contains(baseURL, "dashscope.aliyuncs.com")
+}
--- a/internal/llm/client_test.go
+++ b/internal/llm/client_test.go
@@ -0,0 +1,24 @@
+package llm
+
+import "testing"
+
+func TestShouldDisableThinkingParam(t *testing.T) {
+	if !shouldDisableThinkingParam("https://dashscope.aliyuncs.com/compatible-mode/v1") {
+		t.Fatal("expected DashScope base URL to require enable_thinking=false")
+	}
+	if shouldDisableThinkingParam("https://api.openai.com/v1") {
+		t.Fatal("expected standard OpenAI base URL not to require enable_thinking=false")
+	}
+}
+
+func TestChatCompletionRequestOptions(t *testing.T) {
+	client := &OpenAICompatibleClient{disableThinkingParam: true}
+	if got := len(client.chatCompletionRequestOptions()); got != 1 {
+		t.Fatalf("expected 1 request option when disableThinkingParam=true, got %d", got)
+	}
+
+	client.disableThinkingParam = false
+	if got := len(client.chatCompletionRequestOptions()); got != 0 {
+		t.Fatalf("expected 0 request options when disableThinkingParam=false, got %d", got)
+	}
+}