shell: support Windows cmd /C; normalize date/time; allow all commands; add tests

2026-03-05 17:44:19 +08:00
parent 47b6059773
commit e2f806edb3
19 changed files with 989 additions and 350 deletions
--- a/internal/agent/orchestrator.go
+++ b/internal/agent/orchestrator.go
@@ -2,7 +2,6 @@ package agent

 import (
 	"context"
-	"encoding/json"
 	"fmt"
 	"sort"
 	"strconv"
@@ -17,28 +16,32 @@ import (
 	"laodingbot/internal/tools"
 )

+// Orchestrator 负责协调和组合业务逻辑，包含 LLM 计算、上下文管理、技能匹配计算和工具调用。
 type Orchestrator struct {
-	llm          llm.Client
-	store        *memory.SQLiteStore
-	tools        *tools.Registry
-	soul         string
-	skills       []knowledge.Skill
-	skillsDir    string
-	autoSkillDir string
+	llm                  llm.Client
+	store                *memory.SQLiteStore
+	tools                *tools.Registry
+	soul                 string
+	skills               []knowledge.Skill
+	skillSummaries       []knowledge.SkillSummary
+	skillsDir            string
+	autoSkillDir         string
 	gapDraftTriggerCount int
 	gapLookbackDuration  time.Duration
-	reactMaxStep int
-	enableCapabilityGap bool
-	log          *logger.Logger
-	skillsMu     sync.RWMutex
+	reactMaxStep         int
+	enableCapabilityGap  bool
+	log                  *logger.Logger
+	skillsMu             sync.RWMutex
 }

+// NewOrchestrator 创建一个新的编排器对象，初始化关键路径和超时控制等。
 func NewOrchestrator(
 	llmClient llm.Client,
 	store *memory.SQLiteStore,
 	registry *tools.Registry,
 	soul string,
 	skills []knowledge.Skill,
+	skillSummaries []knowledge.SkillSummary,
 	skillsDir string,
 	reactMaxStep int,
 	enableCapabilityGap bool,
@@ -48,34 +51,41 @@ func NewOrchestrator(
 	log *logger.Logger,
 ) *Orchestrator {
 	if reactMaxStep <= 0 {
-		reactMaxStep = 4
+		reactMaxStep = 8 // 默认最大 ReAct 步骤数为 8
 	}
 	if gapDraftTriggerCount <= 0 {
-		gapDraftTriggerCount = 3
+		gapDraftTriggerCount = 3 // 默认触发技能生成的缺口数量为 3
 	}
 	if gapLookbackDuration <= 0 {
-		gapLookbackDuration = 7 * 24 * time.Hour
+		gapLookbackDuration = 7 * 24 * time.Hour // 默认回溯时长为 7 天
 	}
 	if strings.TrimSpace(autoSkillDir) == "" {
 		autoSkillDir = skillsDir
 	}
 	return &Orchestrator{
-		llm:          llmClient,
-		store:        store,
-		tools:        registry,
-		soul:         soul,
-		skills:       skills,
-		skillsDir:    skillsDir,
-		autoSkillDir: autoSkillDir,
+		llm:                  llmClient,
+		store:                store,
+		tools:                registry,
+		soul:                 soul,
+		skills:               skills,
+		skillSummaries:       copySkillSummaries(skillSummaries),
+		skillsDir:            skillsDir,
+		autoSkillDir:         autoSkillDir,
 		gapDraftTriggerCount: gapDraftTriggerCount,
 		gapLookbackDuration:  gapLookbackDuration,
-		reactMaxStep: reactMaxStep,
-		enableCapabilityGap: enableCapabilityGap,
-		log:          log,
+		reactMaxStep:         reactMaxStep,
+		enableCapabilityGap:  enableCapabilityGap,
+		log:                  log,
 	}
 }

+// HandleMessage 是接受用户消息输入并通过统一 ReAct 循环生成回复的主流程。
+// 不再分"先选 skill 再决策"两步，而是 LLM 第一次调用就同时决定：
+// - 是否可以直接回答（is_final_answer=true）
+// - 是否需要调用工具（action + action_input）
+// 循环持续进行，直到 LLM 返回 is_final_answer=true。
 func (o *Orchestrator) HandleMessage(ctx context.Context, chatID, userID, text string) (string, error) {
+	// 为链路追踪设置唯一的 TraceID
 	traceID := logger.NewTraceID()
 	ctx = logger.WithTraceID(ctx, traceID)
 	traceLogPrefix := "trace_id=" + traceID
@@ -83,12 +93,16 @@ func (o *Orchestrator) HandleMessage(ctx context.Context, chatID, userID, text s
 		o.log.Infof("%s handle message chat_id=%s user_id=%s text_len=%d", traceLogPrefix, chatID, userID, len(text))
 		o.log.Debugf("%s handle message text=%q", traceLogPrefix, text)
 	}
+
+	// 处理特殊的重载指令
 	if strings.EqualFold(strings.TrimSpace(text), "/reload_skills") {
 		if err := o.ReloadSkills(); err != nil {
 			return "技能热加载失败: " + err.Error(), nil
 		}
 		return "技能已热加载完成。", nil
 	}
+
+	// 如果用户请求能力缺口报告，则生成报告格式化输出
 	if strings.EqualFold(strings.TrimSpace(text), "/capability_gaps") {
 		report, err := o.BuildCapabilityGapReport(10)
 		if err != nil {
@@ -96,6 +110,8 @@ func (o *Orchestrator) HandleMessage(ctx context.Context, chatID, userID, text s
 		}
 		return report, nil
 	}
+
+	// 保存用户消息到 SQLite 中
 	if err := o.store.SaveMessage(chatID, userID, "user", text); err != nil {
 		if o.log != nil {
 			o.log.Errorf("%s save user message failed chat_id=%s err=%v", traceLogPrefix, chatID, err)
@@ -103,6 +119,7 @@ func (o *Orchestrator) HandleMessage(ctx context.Context, chatID, userID, text s
 		return "", err
 	}

+	// 读取最近的会话记忆并压缩成 Prompt 上下文
 	recent, err := o.store.LoadRecent(chatID, 16)
 	if err != nil {
 		if o.log != nil {
@@ -115,35 +132,8 @@ func (o *Orchestrator) HandleMessage(ctx context.Context, chatID, userID, text s
 		o.log.Debugf("%s prompt context prepared chat_id=%s recent_count=%d compressed_len=%d", traceLogPrefix, chatID, len(recent), len(compressed))
 	}

-	matchedSkills := o.matchSkills(ctx, compressed, text)
-	if len(matchedSkills) == 0 {
-		if bootstrap, ok := o.findSkillByKeyword("创建skill", "skill builder", "skill 创建", "构建技能"); ok {
-			matchedSkills = []knowledge.Skill{bootstrap}
-			if o.log != nil {
-				o.log.Infof("%s fallback bootstrap skill selected name=%s", traceLogPrefix, bootstrap.Name)
-			}
-		}
-	}
-
-	var response string
-	if len(matchedSkills) == 0 {
-		if o.log != nil {
-			o.log.Infof("%s no skill matched; use direct llm chat_id=%s", traceLogPrefix, chatID)
-		}
-		o.emitCapabilityGap(chatID, userID, text, "no_skill_matched")
-		response, err = o.runDirectLLM(ctx, compressed, text)
-	} else {
-		if o.log != nil {
-			names := make([]string, 0, len(matchedSkills))
-			for _, s := range matchedSkills {
-				names = append(names, s.Name)
-				o.log.Infof("%s skill selected name=%s source=%s", traceLogPrefix, s.Name, s.Source)
-				o.log.Debugf("%s skill selected content name=%s content=%q", traceLogPrefix, s.Name, s.Content)
-			}
-			o.log.Infof("%s skills matched chat_id=%s skills=%s", traceLogPrefix, chatID, strings.Join(names, ","))
-		}
-		response, err = o.runReAct(ctx, chatID, userID, compressed, text, matchedSkills)
-	}
+	// 进入统一 ReAct 循环
+	response, err := o.runUnifiedReAct(ctx, chatID, userID, compressed, text)
 	if err != nil {
 		if o.log != nil {
 			o.log.Errorf("%s message generation failed chat_id=%s err=%v", traceLogPrefix, chatID, err)
@@ -151,86 +141,87 @@ func (o *Orchestrator) HandleMessage(ctx context.Context, chatID, userID, text s
 		return "", err
 	}

+	// 最终将机器人的回复也加入记忆缓存
 	if err := o.store.SaveMessage(chatID, userID, "assistant", response); err != nil {
 		if o.log != nil {
 			o.log.Errorf("%s save assistant response failed chat_id=%s err=%v", traceLogPrefix, chatID, err)
 		}
 		return "", err
 	}
+
 	if o.log != nil {
 		o.log.Infof("%s message handled chat_id=%s response_len=%d", traceLogPrefix, chatID, len(response))
 	}
 	return response, nil
 }

-func (o *Orchestrator) runDirectLLM(ctx context.Context, compressedContext, userInput string) (string, error) {
-	systemPrompt := strings.Join([]string{
-		"你是一个个人自动化助手，必须遵循如下人格设定并保持一致：",
-		o.soul,
-		"",
-		"如果当前问题没有匹配到已定义技能，请直接回答用户。",
-		"当你判断必须依赖外部工具结果才能可靠回答时，请明确告知用户需要进一步操作信息。",
-	}, "\n")
-
-	userPrompt := strings.Join([]string{
-		"历史上下文：",
-		compressedContext,
-		"",
-		"用户问题：",
-		userInput,
-	}, "\n")
-
-	return o.llm.Generate(ctx, systemPrompt, userPrompt)
-}
-
-type reactDecision struct {
-	Thought     string `json:"thought"`
-	Action      string `json:"action"`
-	ActionInput string `json:"action_input"`
-	Final       string `json:"final"`
-}
-
-func (o *Orchestrator) runReAct(ctx context.Context, chatID, userID, compressedContext, userInput string, selectedSkills []knowledge.Skill) (string, error) {
-	traceID := logger.TraceIDFromContext(ctx)
-	traceLogPrefix := "trace_id=" + traceID
-	selectedSkillsDoc := formatSkills(selectedSkills)
+// buildUnifiedSystemPrompt 构建统一 ReAct 循环的 system prompt。
+// 包含人格设定、所有可用技能（含完整内容）、所有可用工具、以及 JSON 输出格式约束。
+func (o *Orchestrator) buildUnifiedSystemPrompt() string {
+	skillMetaDoc := o.formatSkillSummariesForPrompt()
+	allSkillsDoc := o.formatAllSkillsContent()
 	toolDoc := o.formatToolDoc()
-	if o.log != nil {
-		names := make([]string, 0, len(selectedSkills))
-		for _, s := range selectedSkills {
-			names = append(names, s.Name)
-		}
-		o.log.Infof("%s react start steps=%d skills=%s", traceLogPrefix, o.reactMaxStep, strings.Join(names, ","))
-		o.log.Debugf("%s react selected_skills_doc=%q", traceLogPrefix, selectedSkillsDoc)
-		o.log.Debugf("%s react tools_doc=%q", traceLogPrefix, toolDoc)
-	}

-	systemPrompt := strings.Join([]string{
+	return strings.Join([]string{
 		"你是一个个人自动化助手，必须遵循如下人格设定并保持一致：",
 		o.soul,
 		"",
-		"已匹配到的 skills（只可按下列技能执行）：",
-		selectedSkillsDoc,
+		"===== 可用技能概览 =====",
+		skillMetaDoc,
 		"",
-		"可用工具：",
+		"===== 技能详细说明 =====",
+		allSkillsDoc,
+		"",
+		"===== 可用工具 =====",
 		toolDoc,
 		"",
-		"你必须使用 ReAct 模式做决策。",
-		"只有当技能明确需要工具能力时才调用工具。",
-		"如果问题可直接回答，不要调用工具。",
-		"你的输出必须是 JSON，对象字段为 thought, action, action_input, final。",
-		"规则：",
-		"1) 当需要调工具时：final 置空，action 必须是可用工具之一，action_input 为工具输入。",
-		"2) 当可以最终回答时：action 置 none，action_input 置空，final 填最终回复。",
-		"3) 不要输出 JSON 之外内容。",
+		"===== 输出格式约束 =====",
+		"你必须使用 ReAct（Reasoning + Acting）模式进行决策。",
+		"每次回复必须是且仅是一个 JSON 对象，字段如下：",
+		"",
+		"{",
+		"  \"thought\": \"你的推理过程（必填）\",",
+		"  \"action\": \"要调用的工具名称，如 file/shell/web_search（不调工具时填 none）\",",
+		"  \"action_input\": \"传给工具的输入（字符串或对象），不调工具时填空字符串或 null\",",
+		"  \"is_final_answer\": true 或 false,",
+		"  \"final_answer\": \"当 is_final_answer=true 时填写给用户的最终回复，否则填 null\"",
+		"}",
+		"",
+		"决策规则：",
+		"1) 如果你可以直接回答用户问题（不需要任何工具）：",
+		"   设 is_final_answer=true，action=\"none\"，final_answer 填写完整回复。",
+		"2) 如果你需要调用工具获取信息后才能回答：",
+		"   设 is_final_answer=false，action 填工具名，action_input 填工具所需输入，final_answer=null。",
+		"3) 不要在 JSON 之外输出任何内容。",
+		"4) 根据技能说明中的指引决定何时以及如何使用工具。",
+		"5) 每轮工具调用结果会以 Observation 的形式追加到推理记录中，供你下一轮决策参考。",
 	}, "\n")
+}

+// runUnifiedReAct 执行统一的 ReAct 循环。
+// LLM 每次都看到完整的技能集+工具集，自行决定是否调用工具或直接回答。
+// 循环持续到 is_final_answer=true 或达到安全上限。
+func (o *Orchestrator) runUnifiedReAct(ctx context.Context, chatID, userID, compressedContext, userInput string) (string, error) {
+	traceID := logger.TraceIDFromContext(ctx)
+	traceLogPrefix := "trace_id=" + traceID
+
+	systemPrompt := o.buildUnifiedSystemPrompt()
+
+	if o.log != nil {
+		o.log.Infof("%s unified react start", traceLogPrefix)
+	}
+
+	// 安全上限：防止无限循环（当前暂不使用 reactMaxStep 配置约束，使用固定硬上限）
+	const maxSteps = 20
 	scratchpad := ""
-	for step := 1; step <= o.reactMaxStep; step++ {
+
+	for step := 1; step <= maxSteps; step++ {
 		if o.log != nil {
-			o.log.Infof("%s react step start step=%d/%d", traceLogPrefix, step, o.reactMaxStep)
-			o.log.Debugf("%s react scratchpad_before step=%d content=%q", traceLogPrefix, step, scratchpad)
+			o.log.Infof("%s react step=%d start", traceLogPrefix, step)
+			o.log.Debugf("%s react step=%d scratchpad=%q", traceLogPrefix, step, scratchpad)
 		}
+
+		// 构造本轮 user prompt：历史上下文 + 用户问题 + 推理记录
 		prompt := strings.Join([]string{
 			"历史上下文：",
 			compressedContext,
@@ -241,7 +232,7 @@ func (o *Orchestrator) runReAct(ctx context.Context, chatID, userID, compressedC
 			"当前推理记录（按时间顺序）：",
 			scratchpad,
 			"",
-			fmt.Sprintf("请输出下一步 JSON 决策。当前步骤: %d/%d", step, o.reactMaxStep),
+			"请输出你的 JSON 决策。",
 		}, "\n")

 		raw, err := o.llm.Generate(ctx, systemPrompt, prompt)
@@ -249,51 +240,72 @@ func (o *Orchestrator) runReAct(ctx context.Context, chatID, userID, compressedC
 			return "", err
 		}
 		if o.log != nil {
-			o.log.Infof("%s react step llm output step=%d raw=%q", traceLogPrefix, step, raw)
+			o.log.Infof("%s react step=%d llm_raw=%q", traceLogPrefix, step, raw)
 		}
+
+		// 解析 LLM 返回的 JSON 决策
 		decision, err := parseDecision(raw)
 		if err != nil {
 			if o.log != nil {
-				o.log.Warnf("%s react parse failed, fallback to direct llm err=%v", traceLogPrefix, err)
+				o.log.Warnf("%s react step=%d parse failed err=%v, using raw as final answer", traceLogPrefix, step, err)
 			}
+			// 解析失败时，尝试将原始输出当作直接回答返回
 			o.emitCapabilityGap(chatID, userID, userInput, "react_parse_failed")
-			return o.runDirectLLM(ctx, compressedContext, userInput)
+			return strings.TrimSpace(raw), nil
 		}
+
 		if o.log != nil {
-			o.log.Infof("%s react step decision step=%d thought=%q action=%q action_input=%q final=%q", traceLogPrefix, step, decision.Thought, decision.Action, decision.ActionInput, decision.Final)
+			o.log.Infof("%s react step=%d thought=%q action=%q is_final=%v",
+				traceLogPrefix, step, decision.Thought, decision.Action, decision.IsFinalAnswer)
 		}

-		action := strings.ToLower(strings.TrimSpace(decision.Action))
-		if action == "" {
-			action = "none"
-		}
-
-		if action == "none" {
-			finalText := strings.TrimSpace(decision.Final)
+		// ========== 判定：是否为最终回答 ==========
+		if decision.IsFinalAnswer {
+			finalText := ""
+			if decision.FinalAnswer != nil {
+				finalText = strings.TrimSpace(*decision.FinalAnswer)
+			}
 			if finalText == "" {
-				finalText = "我已完成思考，但当前没有足够信息给出稳定结论。"
+				finalText = strings.TrimSpace(decision.Thought)
+			}
+			if finalText == "" {
+				finalText = "已完成处理。"
 			}
 			if o.log != nil {
-				o.log.Infof("%s react final step=%d final=%q", traceLogPrefix, step, finalText)
+				o.log.Infof("%s react final at step=%d answer=%q", traceLogPrefix, step, finalText)
 			}
 			return finalText, nil
 		}

+		// ========== 非最终回答：执行工具调用 ==========
+		action := strings.ToLower(strings.TrimSpace(decision.Action))
+		if action == "" || action == "none" {
+			// LLM 说不是最终回答但也不指定工具，记录后让它再想一轮
+			scratchpad += "Step " + strconv.Itoa(step) + " Thought: " + decision.Thought + "\n"
+			scratchpad += "Step " + strconv.Itoa(step) + " Observation: 你没有指定要调用的工具，请重新决策：要么调用工具，要么给出最终回答。\n"
+			continue
+		}
+
+		actionInput := decision.GetActionInputString()
+
+		// 检查工具是否存在
 		tool, ok := o.tools.Get(action)
 		if !ok {
 			if o.log != nil {
-				o.log.Warnf("%s react step tool missing step=%d tool=%s", traceLogPrefix, step, action)
+				o.log.Warnf("%s react step=%d tool_not_found=%s", traceLogPrefix, step, action)
 			}
 			scratchpad += "Step " + strconv.Itoa(step) + " Thought: " + decision.Thought + "\n"
-			scratchpad += "Step " + strconv.Itoa(step) + " Observation: " + formatToolErrorObservation("TOOL_NOT_FOUND", action, "tool not found") + "\n"
+			scratchpad += "Step " + strconv.Itoa(step) + " Action: " + action + "\n"
+			scratchpad += "Step " + strconv.Itoa(step) + " Observation: " + formatToolErrorObservation("TOOL_NOT_FOUND", action, "该工具不存在，可用工具请参阅 system prompt") + "\n"
 			o.emitCapabilityGap(chatID, userID, userInput, "tool_not_found:"+action)
 			continue
 		}

-		toolOut, toolErr := tool.Call(ctx, decision.ActionInput)
+		// 调用工具
 		if o.log != nil {
-			o.log.Infof("%s react step tool call step=%d tool=%s input=%q", traceLogPrefix, step, action, decision.ActionInput)
+			o.log.Infof("%s react step=%d tool_call tool=%s input=%q", traceLogPrefix, step, action, actionInput)
 		}
+		toolOut, toolErr := tool.Call(ctx, actionInput)
 		obs := strings.TrimSpace(toolOut)
 		if obs == "" {
 			obs = "(empty output)"
@@ -302,103 +314,37 @@ func (o *Orchestrator) runReAct(ctx context.Context, chatID, userID, compressedC
 			obs = formatToolErrorObservation("TOOL_EXEC_ERROR", action, toolErr.Error()) + "\nOUTPUT:\n" + obs
 			o.emitCapabilityGap(chatID, userID, userInput, "tool_call_failed:"+action)
 		}
+		// 限制观察值长度防止超出 LLM 上下文窗口
+		if len(obs) > 4000 {
+			obs = obs[:4000] + "\n...(truncated)"
+		}
+
 		if o.log != nil {
-			o.log.Infof("%s react step observation step=%d tool=%s observation=%q", traceLogPrefix, step, action, obs)
-		}
-		if len(obs) > 2000 {
-			obs = obs[:2000]
+			o.log.Infof("%s react step=%d observation_len=%d", traceLogPrefix, step, len(obs))
 		}
+
+		// 将本轮的思考、行动、观察追加到 scratchpad
 		scratchpad += "Step " + strconv.Itoa(step) + " Thought: " + decision.Thought + "\n"
 		scratchpad += "Step " + strconv.Itoa(step) + " Action: " + action + "\n"
-		scratchpad += "Step " + strconv.Itoa(step) + " ActionInput: " + decision.ActionInput + "\n"
+		scratchpad += "Step " + strconv.Itoa(step) + " ActionInput: " + actionInput + "\n"
 		scratchpad += "Step " + strconv.Itoa(step) + " Observation: " + obs + "\n"
 	}

+	// 达到安全上限仍未得到最终回答
 	o.emitCapabilityGap(chatID, userID, userInput, "react_step_exhausted")
-	return "我尝试了多轮思考与工具调用，但仍未得到稳定结论。请给我更具体的约束或允许我继续尝试。", nil
+	return "我尝试了多轮推理与工具调用，但仍未得到稳定结论。请给我更具体的约束或允许我继续尝试。", nil
 }

-func (o *Orchestrator) matchSkills(ctx context.Context, compressedContext, userInput string) []knowledge.Skill {
+// formatAllSkillsContent 返回所有技能的完整内容，用于注入到 system prompt 中。
+func (o *Orchestrator) formatAllSkillsContent() string {
 	skills := o.getSkillsSnapshot()
 	if len(skills) == 0 {
-		return nil
+		return "(none)"
 	}
-
-	type skillChoice struct {
-		Skills []string `json:"skills"`
-	}
-
-	systemPrompt := strings.Join([]string{
-		"你是技能路由器。",
-		"任务：根据用户问题，从候选技能中选择 0-2 个最相关技能名称。",
-		"输出必须是 JSON：{\"skills\":[\"name1\",\"name2\"]}",
-		"如果没有匹配技能，返回 {\"skills\":[]}。",
-		"不要输出 JSON 之外内容。",
-	}, "\n")
-
-	userPrompt := strings.Join([]string{
-		"候选技能：",
-		formatSkillCatalog(skills),
-		"",
-		"历史上下文：",
-		compressedContext,
-		"",
-		"用户问题：",
-		userInput,
-	}, "\n")
-
-	raw, err := o.llm.Generate(ctx, systemPrompt, userPrompt)
-	if err != nil {
-		if o.log != nil {
-			o.log.Warnf("skill match llm failed err=%v", err)
-		}
-		return nil
-	}
-	if o.log != nil {
-		o.log.Infof("skill router output raw=%q", raw)
-	}
-
-	raw = normalizeJSON(raw)
-	choice := skillChoice{}
-	if err := json.Unmarshal([]byte(raw), &choice); err != nil {
-		if o.log != nil {
-			o.log.Warnf("skill match parse failed err=%v", err)
-		}
-		return nil
-	}
-
-	picked := make([]knowledge.Skill, 0, 2)
-	seen := map[string]struct{}{}
-	for _, name := range choice.Skills {
-		name = strings.TrimSpace(strings.ToLower(name))
-		if name == "" {
-			continue
-		}
-		if _, ok := seen[name]; ok {
-			continue
-		}
-		for _, skill := range skills {
-			if strings.ToLower(strings.TrimSpace(skill.Name)) == name {
-				picked = append(picked, skill)
-				seen[name] = struct{}{}
-				break
-			}
-		}
-		if len(picked) >= 2 {
-			break
-		}
-	}
-	if o.log != nil {
-		names := make([]string, 0, len(picked))
-		for _, s := range picked {
-			names = append(names, s.Name)
-		}
-		o.log.Infof("skill router selected skills=%s", strings.Join(names, ","))
-	}
-
-	return picked
+	return formatSkills(skills)
 }

+// emitCapabilityGap 处理能力缺口信息埋点或者通过 AI 自动创建生成相应缺失技能的逻辑
 func (o *Orchestrator) emitCapabilityGap(chatID, userID, intent, reason string) {
 	if !o.enableCapabilityGap {
 		return
@@ -409,16 +355,17 @@ func (o *Orchestrator) emitCapabilityGap(chatID, userID, intent, reason string)
 		return
 	}
 	if len(intent) > 1000 {
-		intent = intent[:1000]
+		intent = intent[:1000] // 防止恶意使用超长 payload
 	}
 	if len(reason) > 240 {
-		reason = reason[:240]
+		reason = reason[:240] // 保证状态长度在 DB 内正常可用
 	}
 	if err := o.store.SaveCapabilityGap(chatID, userID, intent, reason); err != nil && o.log != nil {
 		o.log.Warnf("save capability gap failed chat_id=%s user_id=%s err=%v", chatID, userID, err)
 		return
 	}

+	// 提取出高频率缺口并在超出阈值后进行 draft 生成
 	clusters, err := o.store.TopCapabilityGapClusters(20, time.Now().UTC().Add(-o.gapLookbackDuration))
 	if err != nil {
 		if o.log != nil {
@@ -430,6 +377,7 @@ func (o *Orchestrator) emitCapabilityGap(chatID, userID, intent, reason string)
 		if c.Count < o.gapDraftTriggerCount {
 			continue
 		}
+
 		path, created, draftErr := knowledge.GenerateSkillDraft(c, o.autoSkillDir)
 		if draftErr != nil {
 			if o.log != nil {
@@ -440,6 +388,7 @@ func (o *Orchestrator) emitCapabilityGap(chatID, userID, intent, reason string)
 		if created && o.log != nil {
 			o.log.Infof("capability gap draft generated path=%s intent_key=%s reason=%s count=%d", path, c.IntentKey, c.Reason, c.Count)
 		}
+		// 如果生成了新技能则将它们重新加载进环境
 		if created {
 			if reloadErr := o.ReloadSkills(); reloadErr != nil && o.log != nil {
 				o.log.Warnf("auto reload skills failed after generation path=%s err=%v", path, reloadErr)
@@ -448,13 +397,20 @@ func (o *Orchestrator) emitCapabilityGap(chatID, userID, intent, reason string)
 	}
 }

+// ReloadSkills 会从提供的技能目录动态从最新存储位置载入所有技能定义而不重启系统。
 func (o *Orchestrator) ReloadSkills() error {
 	skills, err := knowledge.LoadSkillSet(o.skillsDir)
 	if err != nil {
 		return err
 	}
+	summaries, err := knowledge.LoadSkillSummaries(o.skillsDir)
+	if err != nil {
+		return err
+	}
+	// 利用 RWMutex 做热更新保护
 	o.skillsMu.Lock()
 	o.skills = skills
+	o.skillSummaries = copySkillSummaries(summaries)
 	o.skillsMu.Unlock()
 	if o.log != nil {
 		o.log.Infof("skills hot reloaded count=%d dir=%s", len(skills), o.skillsDir)
@@ -470,6 +426,13 @@ func (o *Orchestrator) getSkillsSnapshot() []knowledge.Skill {
 	return out
 }

+func (o *Orchestrator) getSkillSummariesSnapshot() []knowledge.SkillSummary {
+	o.skillsMu.RLock()
+	defer o.skillsMu.RUnlock()
+	return copySkillSummaries(o.skillSummaries)
+}
+
+// BuildCapabilityGapReport 生成指定数量以内的近期高频缺失功能报错并格式化成报表。
 func (o *Orchestrator) BuildCapabilityGapReport(limit int) (string, error) {
 	clusters, err := o.store.TopCapabilityGapClusters(limit, time.Now().UTC().Add(-o.gapLookbackDuration))
 	if err != nil {
@@ -490,25 +453,55 @@ func (o *Orchestrator) BuildCapabilityGapReport(limit int) (string, error) {
 	return b.String(), nil
 }

-func (o *Orchestrator) findSkillByKeyword(keywords ...string) (knowledge.Skill, bool) {
-	if len(keywords) == 0 {
-		return knowledge.Skill{}, false
+func (o *Orchestrator) formatSkillSummariesForPrompt() string {
+	summaries := o.getSkillSummariesSnapshot()
+	if len(summaries) == 0 {
+		return "(none)"
 	}
-	skills := o.getSkillsSnapshot()
-	for _, s := range skills {
-		name := strings.ToLower(strings.TrimSpace(s.Name))
-		content := strings.ToLower(strings.TrimSpace(s.Content))
-		for _, kw := range keywords {
-			kw = strings.ToLower(strings.TrimSpace(kw))
-			if kw == "" {
-				continue
-			}
-			if strings.Contains(name, kw) || strings.Contains(content, kw) {
-				return s, true
-			}
+	sort.Slice(summaries, func(i, j int) bool {
+		left := strings.ToLower(strings.TrimSpace(summaries[i].DirName))
+		right := strings.ToLower(strings.TrimSpace(summaries[j].DirName))
+		if left == right {
+			return strings.ToLower(strings.TrimSpace(summaries[i].Name)) < strings.ToLower(strings.TrimSpace(summaries[j].Name))
 		}
+		return left < right
+	})
+	b := strings.Builder{}
+	for _, summary := range summaries {
+		dir := strings.TrimSpace(summary.DirName)
+		name := strings.TrimSpace(summary.Name)
+		desc := strings.TrimSpace(summary.Description)
+		if name == "" {
+			continue
+		}
+		if len(desc) > 220 {
+			desc = desc[:220]
+		}
+		b.WriteString("- ")
+		if dir != "" {
+			b.WriteString("[")
+			b.WriteString(dir)
+			b.WriteString("] ")
+		}
+		b.WriteString(name)
+		if desc != "" {
+			b.WriteString(" => ")
+			b.WriteString(desc)
+		}
+		b.WriteString("\n")
 	}
-	return knowledge.Skill{}, false
+	return strings.TrimSpace(b.String())
+}
+
+func copySkillSummaries(in []knowledge.SkillSummary) []knowledge.SkillSummary {
+	out := make([]knowledge.SkillSummary, len(in))
+	copy(out, in)
+	for i := range out {
+		out[i].DirName = strings.TrimSpace(out[i].DirName)
+		out[i].Name = strings.TrimSpace(out[i].Name)
+		out[i].Description = strings.TrimSpace(out[i].Description)
+	}
+	return out
 }

 func formatToolErrorObservation(code, action, reason string) string {
@@ -539,25 +532,6 @@ func formatSkills(skills []knowledge.Skill) string {
 	return strings.TrimSpace(b.String())
 }

-func formatSkillCatalog(skills []knowledge.Skill) string {
-	b := strings.Builder{}
-	for _, skill := range skills {
-		summary := strings.ReplaceAll(skill.Content, "\n", " ")
-		summary = strings.TrimSpace(summary)
-		if len(summary) > 220 {
-			summary = summary[:220]
-		}
-		b.WriteString("- ")
-		b.WriteString(skill.Name)
-		if summary != "" {
-			b.WriteString(": ")
-			b.WriteString(summary)
-		}
-		b.WriteString("\n")
-	}
-	return strings.TrimSpace(b.String())
-}
-
 func (o *Orchestrator) formatToolDoc() string {
 	list := o.tools.List()
 	if len(list) == 0 {
--- a/internal/agent/react_parser.go
+++ b/internal/agent/react_parser.go
@@ -6,6 +6,36 @@ import (
 	"strings"
 )

+// reactDecision 是 LLM 在统一 ReAct 循环中返回的结构化 JSON 决策。
+// 每轮 LLM 调用都返回这个结构，由 agent 判断是否继续循环。
+type reactDecision struct {
+	// Thought 是 LLM 的当前推理过程描述
+	Thought string `json:"thought"`
+	// Action 是需要调用的工具名称（如 "file"、"shell"、"web_search"），不需要工具时为 "none" 或空
+	Action string `json:"action"`
+	// ActionInput 是传给工具的输入参数，可以是字符串或结构化对象
+	ActionInput json.RawMessage `json:"action_input"`
+	// IsFinalAnswer 标记本轮是否为最终回答。true 表示 ReAct 循环结束。
+	IsFinalAnswer bool `json:"is_final_answer"`
+	// FinalAnswer 当 IsFinalAnswer 为 true 时，包含给用户的最终回复内容
+	FinalAnswer *string `json:"final_answer"`
+}
+
+// GetActionInputString 将 ActionInput 转为字符串，用于传递给工具的 Call 方法。
+// 如果 ActionInput 是 JSON 字符串则去掉引号；如果是对象/数组则保持 JSON 原文。
+func (d *reactDecision) GetActionInputString() string {
+	if len(d.ActionInput) == 0 {
+		return ""
+	}
+	// 尝试解析为字符串
+	var s string
+	if err := json.Unmarshal(d.ActionInput, &s); err == nil {
+		return s
+	}
+	// 非字符串则直接返回 JSON 原文
+	return strings.TrimSpace(string(d.ActionInput))
+}
+
 func parseDecision(raw string) (reactDecision, error) {
 	raw = normalizeJSON(raw)
 	start := strings.Index(raw, "{")
--- a/internal/agent/react_parser_test.go
+++ b/internal/agent/react_parser_test.go
@@ -2,28 +2,70 @@ package agent

 import "testing"

-func TestParseDecisionPlainJSON(t *testing.T) {
-	raw := `{"thought":"t","action":"none","action_input":"","final":"ok"}`
+// TestParseDecisionFinalAnswer 测试 is_final_answer=true 时能正确解析 final_answer
+func TestParseDecisionFinalAnswer(t *testing.T) {
+	raw := `{"thought":"直接回答","action":"none","action_input":"","is_final_answer":true,"final_answer":"你好！"}`
 	got, err := parseDecision(raw)
 	if err != nil {
 		t.Fatalf("parseDecision error: %v", err)
 	}
-	if got.Action != "none" || got.Final != "ok" {
-		t.Fatalf("unexpected decision: %+v", got)
+	if !got.IsFinalAnswer {
+		t.Fatal("expected is_final_answer=true")
+	}
+	if got.FinalAnswer == nil || *got.FinalAnswer != "你好！" {
+		t.Fatalf("unexpected final_answer: %v", got.FinalAnswer)
 	}
 }

+// TestParseDecisionToolCall 测试需要调工具时的解析
+func TestParseDecisionToolCall(t *testing.T) {
+	raw := `{"thought":"需要搜索","action":"web_search","action_input":"NVIDIA stock price","is_final_answer":false,"final_answer":null}`
+	got, err := parseDecision(raw)
+	if err != nil {
+		t.Fatalf("parseDecision error: %v", err)
+	}
+	if got.IsFinalAnswer {
+		t.Fatal("expected is_final_answer=false")
+	}
+	if got.Action != "web_search" {
+		t.Fatalf("expected action=web_search, got %s", got.Action)
+	}
+	input := got.GetActionInputString()
+	if input != "NVIDIA stock price" {
+		t.Fatalf("expected action_input string, got %q", input)
+	}
+}
+
+// TestParseDecisionStructuredActionInput 测试 action_input 为结构化对象时的解析
+func TestParseDecisionStructuredActionInput(t *testing.T) {
+	raw := `{"thought":"搜索","action":"web_search","action_input":{"query":"test","context":"dev"},"is_final_answer":false,"final_answer":null}`
+	got, err := parseDecision(raw)
+	if err != nil {
+		t.Fatalf("parseDecision error: %v", err)
+	}
+	input := got.GetActionInputString()
+	if input == "" {
+		t.Fatal("expected non-empty action_input")
+	}
+	// 结构化对象应保留 JSON 原文
+	if input[0] != '{' {
+		t.Fatalf("expected JSON object string, got %q", input)
+	}
+}
+
+// TestParseDecisionCodeFence 测试被 markdown code fence 包裹的 JSON
 func TestParseDecisionCodeFence(t *testing.T) {
-	raw := "```json\n{\"thought\":\"t\",\"action\":\"shell\",\"action_input\":\"ls\",\"final\":\"\"}\n```"
+	raw := "```json\n{\"thought\":\"t\",\"action\":\"shell\",\"action_input\":\"ls\",\"is_final_answer\":false,\"final_answer\":null}\n```"
 	got, err := parseDecision(raw)
 	if err != nil {
 		t.Fatalf("parseDecision error: %v", err)
 	}
-	if got.Action != "shell" || got.ActionInput != "ls" {
-		t.Fatalf("unexpected decision: %+v", got)
+	if got.Action != "shell" {
+		t.Fatalf("unexpected action: %s", got.Action)
 	}
 }

+// TestParseDecisionInvalid 测试非 JSON 输入时返回错误
 func TestParseDecisionInvalid(t *testing.T) {
 	_, err := parseDecision("not json")
 	if err == nil {