From 7d6cf6b4359e8c46d9e655471bf21684b8a92c6c Mon Sep 17 00:00:00 2001
From: "Ding, Shuo" <shuo.ding@t-systems.com>
Date: Sat, 28 Feb 2026 17:48:33 +0800
Subject: [PATCH] feat: add workspace-isolated toolhost runtime and
 capability-gap skill loop

---
 .vscode/launch.json                        |  38 +++
 cmd/bot/main.go                            |  63 ++++-
 configs/env.sample                         |  11 +-
 doc/技术说明文档.md                        | 196 +++++++++++++
 internal/agent/orchestrator.go             | 271 ++++++++++++++----
 internal/agent/react_parser.go             |  31 +++
 internal/agent/react_parser_test.go        |  32 +++
 internal/config/config.go                  | 214 +++++++++++++--
 internal/knowledge/drafts.go               | 141 ++++++++++
 internal/knowledge/drafts_test.go          |  37 +++
 internal/knowledge/loader.go               |  22 --
 internal/logger/trace.go                   |  32 +++
 internal/memory/store_sqlite.go            | 159 +++++++++++
 internal/memory/store_sqlite_test.go       |  64 +++++
 internal/memory/types.go                   |  20 ++
 internal/runtimews/bootstrap.go            | 109 ++++++++
 internal/toolhost/client.go                | 303 +++++++++++++++++++++
 internal/toolhost/protocol.go              |  37 +++
 internal/toolhost/remote_tool.go           |  36 +++
 internal/toolhost/runtime.go               |  45 +++
 internal/toolhost/server.go                | 106 +++++++
 internal/toolhost/stdio.go                 |  14 +
 internal/tools/filetool/filetool.go        |  83 +++++-
 internal/tools/filetool/filetool_test.go   |  66 +++++
 internal/tools/shelltool/shelltool.go      |  28 +-
 internal/tools/shelltool/shelltool_test.go |  23 ++
 skills/filesystem_query/skill.md           | 133 ++++++++-
 skills/skill_builder/skill.md              |  52 ++++
 28 files changed, 2223 insertions(+), 143 deletions(-)
 create mode 100644 .vscode/launch.json
 create mode 100644 doc/技术说明文档.md
 create mode 100644 internal/agent/react_parser.go
 create mode 100644 internal/agent/react_parser_test.go
 create mode 100644 internal/knowledge/drafts.go
 create mode 100644 internal/knowledge/drafts_test.go
 create mode 100644 internal/logger/trace.go
 create mode 100644 internal/memory/store_sqlite_test.go
 create mode 100644 internal/memory/types.go
 create mode 100644 internal/runtimews/bootstrap.go
 create mode 100644 internal/toolhost/client.go
 create mode 100644 internal/toolhost/protocol.go
 create mode 100644 internal/toolhost/remote_tool.go
 create mode 100644 internal/toolhost/runtime.go
 create mode 100644 internal/toolhost/server.go
 create mode 100644 internal/toolhost/stdio.go
 create mode 100644 internal/tools/filetool/filetool_test.go
 create mode 100644 internal/tools/shelltool/shelltool_test.go
 create mode 100644 skills/skill_builder/skill.md

diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..c5d955a
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,38 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "LaodingBot: Debug (from configs/env)",
+            "type": "go",
+            "request": "launch",
+            "mode": "auto",
+            "program": "${workspaceFolder}/cmd/bot",
+            "cwd": "${workspaceFolder}",
+            "envFile": "${workspaceFolder}/configs/env"
+        },
+        {
+            "name": "LaodingBot: Debug Telegram",
+            "type": "go",
+            "request": "launch",
+            "mode": "auto",
+            "program": "${workspaceFolder}/cmd/bot",
+            "cwd": "${workspaceFolder}",
+            "envFile": "${workspaceFolder}/configs/env",
+            "env": {
+                "MESSAGE_CHANNEL": "telegram"
+            }
+        },
+        {
+            "name": "LaodingBot: Debug Feishu",
+            "type": "go",
+            "request": "launch",
+            "mode": "auto",
+            "program": "${workspaceFolder}/cmd/bot",
+            "cwd": "${workspaceFolder}",
+            "envFile": "${workspaceFolder}/configs/env",
+            "env": {
+                "MESSAGE_CHANNEL": "feishu"
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/cmd/bot/main.go b/cmd/bot/main.go
index 9a75ec5..8ed8513 100644
--- a/cmd/bot/main.go
+++ b/cmd/bot/main.go
@@ -3,6 +3,7 @@ package main
 import (
 	"context"
 	"fmt"
+	"os"
 	"os/signal"
 	"syscall"
 	"time"
@@ -13,9 +14,9 @@ import (
 	"laodingbot/internal/llm"
 	"laodingbot/internal/logger"
 	"laodingbot/internal/memory"
+	"laodingbot/internal/runtimews"
+	"laodingbot/internal/toolhost"
 	"laodingbot/internal/tools"
-	"laodingbot/internal/tools/filetool"
-	"laodingbot/internal/tools/shelltool"
 	"laodingbot/internal/transport/feishu"
 	"laodingbot/internal/transport/telegram"
 )
@@ -23,18 +24,29 @@ import (
 func main() {
 	ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
 	defer stop()
+	isToolhostChild := len(os.Args) > 1 && os.Args[1] == "--toolhost"
+	workspaceRoot, err := runtimews.PrepareFromEnv()
+	if err != nil {
+		panic(fmt.Sprintf("prepare runtime workspace failed: %v", err))
+	}
 
 	cfg, err := config.Load()
 	if err != nil {
 		panic(fmt.Sprintf("load config failed: %v", err))
 	}
+	if isToolhostChild {
+		if err := toolhost.RunChild(ctx, cfg, nil); err != nil && ctx.Err() == nil {
+			panic(fmt.Sprintf("toolhost child failed: %v", err))
+		}
+		return
+	}
 
 	appLogger, err := logger.New(cfg.LogLevel)
 	if err != nil {
 		panic(fmt.Sprintf("init logger failed: %v", err))
 	}
 	appLogger = appLogger.WithComponent("main")
-	appLogger.Infof("config loaded; channel=%s, log_level=%s", cfg.MessageChannel, cfg.LogLevel)
+	appLogger.Infof("config loaded; channel=%s, log_level=%s workspace=%s", cfg.MessageChannel, cfg.LogLevel, workspaceRoot)
 
 	store, err := memory.NewSQLiteStore(cfg.SQLitePath, appLogger.WithComponent("memory"))
 	if err != nil {
@@ -44,19 +56,44 @@ func main() {
 	defer store.Close()
 
 	toolRegistry := tools.NewRegistry(appLogger.WithComponent("tools.registry"))
-	toolRegistry.Register(filetool.New(cfg.Security.AllowedDirs, appLogger.WithComponent("tools.file")))
-	toolRegistry.Register(shelltool.New(cfg.Security.AllowedCommands, cfg.Security.WorkDir, 15*time.Second, appLogger.WithComponent("tools.shell")))
+	exePath, err := os.Executable()
+	if err != nil {
+		appLogger.Errorf("resolve executable path failed: %v", err)
+		panic(err)
+	}
+	tc, err := toolhost.NewClient(toolhost.ClientConfig{
+		ExecutablePath:    exePath,
+		Args:              []string{"--toolhost"},
+		WorkDir:           ".",
+		CallTimeout:       time.Duration(cfg.ToolCallTimeoutSec) * time.Second,
+		HeartbeatInterval: 5 * time.Second,
+		MaxConcurrency:    4,
+	}, appLogger.WithComponent("toolhost.client"))
+	if err != nil {
+		appLogger.Errorf("init toolhost client failed: %v", err)
+		panic(err)
+	}
+	defer tc.Close()
+
+	listCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
+	toolInfos, err := tc.ToolList(listCtx)
+	cancel()
+	if err != nil {
+		appLogger.Errorf("toolhost list failed: %v", err)
+		panic(err)
+	}
+	if len(toolInfos) == 0 {
+		panic("toolhost returned empty tool list")
+	}
+	for _, info := range toolInfos {
+		toolRegistry.Register(toolhost.NewRemoteTool(info.Name, info.Description, time.Duration(cfg.ToolCallTimeoutSec)*time.Second, tc))
+	}
 
 	soul, err := knowledge.LoadSoul(cfg.SoulPath)
 	if err != nil {
 		appLogger.Errorf("load soul failed path=%s err=%v", cfg.SoulPath, err)
 		panic(err)
 	}
-	skillsDoc, err := knowledge.LoadSkills(cfg.SkillsDir)
-	if err != nil {
-		appLogger.Errorf("load skills failed dir=%s err=%v", cfg.SkillsDir, err)
-		panic(err)
-	}
 	skillSet, err := knowledge.LoadSkillSet(cfg.SkillsDir)
 	if err != nil {
 		appLogger.Errorf("load skill set failed dir=%s err=%v", cfg.SkillsDir, err)
@@ -71,8 +108,12 @@ func main() {
 		toolRegistry,
 		soul,
 		skillSet,
-		skillsDoc,
+		cfg.SkillsDir,
 		cfg.ReactMaxSteps,
+		cfg.EnableCapabilityGap,
+		cfg.AutoSkillDir,
+		cfg.GapDraftTriggerCount,
+		time.Duration(cfg.GapClusterLookbackHours)*time.Hour,
 		appLogger.WithComponent("agent"),
 	)
 
diff --git a/configs/env.sample b/configs/env.sample
index e52a9e9..d08a190 100644
--- a/configs/env.sample
+++ b/configs/env.sample
@@ -1,8 +1,15 @@
 MESSAGE_CHANNEL=telegram
 LOG_LEVEL=info
+AGENT_WORKSPACE_DIR=./workspace/agent_runtime
 SOUL_PATH=./bot_context/soul.md
 SKILLS_DIR=./skills
 REACT_MAX_STEPS=4
+TOOL_CALL_TIMEOUT_SEC=15
+TOOL_OUTPUT_MAX_CHARS=4000
+ENABLE_CAPABILITY_GAP=true
+AUTO_SKILL_DIR=./skills
+GAP_DRAFT_TRIGGER_COUNT=3
+GAP_CLUSTER_LOOKBACK_HOURS=168
 
 TELEGRAM_BOT_TOKEN=
 TELEGRAM_POLL_TIMEOUT_SECONDS=30
@@ -16,6 +23,6 @@ LLM_API_KEY=
 LLM_MODEL=gpt-4o-mini
 
 SQLITE_PATH=./data/laodingbot.db
-ALLOWED_DIRS=./workspace,./data
-ALLOWED_COMMANDS=pwd,ls,cat,echo,grep,find,head,tail
+ALLOWED_DIRS=./workspace,./data,./skills
+ALLOWED_COMMANDS=pwd,ls,cat,echo,grep,find,head,tail,go,curl,curl.exe
 WORK_DIR=./workspace
diff --git a/doc/技术说明文档.md b/doc/技术说明文档.md
new file mode 100644
index 0000000..1ebc8cf
--- /dev/null
+++ b/doc/技术说明文档.md
@@ -0,0 +1,196 @@
+# LaodingBot 技术说明文档（2026-02-28 最新实现）
+
+> 本文档基于当前代码状态，描述真实可运行架构与能力边界。
+
+---
+
+## 1. 项目定位
+
+LaodingBot 当前已从“单进程工具调用 MVP”演进为：
+- **父进程 Agent 编排**（技能路由 + ReAct + 记忆）
+- **子进程 ToolHost 执行**（JSON-RPC）
+- **workspace 隔离运行空间**（配置与工具权限收敛）
+- **能力缺口闭环**（落库、聚类、自动生成技能并热加载）
+
+核心目标：让 Agent 在安全边界内持续补全能力，而不是仅做静态问答。
+
+---
+
+## 2. 目录与模块
+
+- `cmd/bot/main.go`：应用入口、workspace 引导、toolhost 启动、通道分发
+- `internal/config/config.go`：配置加载、workspace 路径解析、安全策略归一化
+- `internal/runtimews/bootstrap.go`：运行时 workspace 准备与种子目录复制
+- `internal/agent/orchestrator.go`：主编排器（技能匹配、ReAct、能力缺口闭环）
+- `internal/toolhost/*`：工具子进程协议、服务端、客户端、远程工具适配
+- `internal/tools/filetool/filetool.go`：文件工具（`read/list/write`）
+- `internal/tools/shelltool/shelltool.go`：命令工具（白名单 + 超时 + 输出限制）
+- `internal/memory/store_sqlite.go`：消息与能力缺口存储、聚类查询
+- `internal/knowledge/loader.go`：skill/soul 加载
+- `internal/knowledge/drafts.go`：能力缺口驱动的 skill 自动生成
+
+---
+
+## 3. 启动链路（当前）
+
+`main()` 执行顺序：
+1. 建立可取消上下文（SIGINT/SIGTERM）。
+2. 调用 `runtimews.PrepareFromEnv()`：
+   - 解析 `AGENT_WORKSPACE_DIR`（默认 `./workspace/agent_runtime`）
+   - 将 `configs/data/skills/bot_context` 种子复制到 runtime workspace（缺失才复制）
+   - 设定 `CONFIG_ENV_FILE=<workspace>/configs/env`
+3. 调用 `config.Load()`，优先读取 workspace env。
+4. 若 `--toolhost` 模式，进入子进程服务。
+5. 正常父进程：初始化日志、SQLite、ToolHost Client、知识、Orchestrator。
+6. 根据 `MESSAGE_CHANNEL` 启动 Telegram 或 Feishu transport。
+
+---
+
+## 4. 配置加载与优先级（关键变更）
+
+`config.Load()` 的 env 读取优先级：
+1. `CONFIG_ENV_FILE`（强覆盖）
+2. `<workspace>/configs/env` 与 `<workspace>/.env`（强覆盖）
+3. 根目录 `configs/env` 与 `.env`（仅兜底，不覆盖已有值）
+
+这保证 VS Code Debug 场景下，**workspace 配置优先于根目录配置**。
+
+### 关键配置
+- `REACT_MAX_STEPS`：必须来自 env（无代码默认值）
+- `AGENT_WORKSPACE_DIR`：agent 运行空间根目录
+- `ALLOWED_DIRS` / `ALLOWED_COMMANDS` / `WORK_DIR`：工具安全边界
+- `AUTO_SKILL_DIR`：自动生成 skill 的目标目录（默认 workspace/skills）
+- `GAP_DRAFT_TRIGGER_COUNT` / `GAP_CLUSTER_LOOKBACK_HOURS`：缺口聚类触发参数
+
+---
+
+## 5. workspace 隔离策略
+
+当前实现中，Agent 与工具默认都在 workspace 内高权限运行：
+- 相对路径统一按 `AGENT_WORKSPACE_DIR` 解析
+- `ALLOWED_DIRS` 强制补齐：
+  - workspace 根
+  - `workspace/skills`
+  - `workspace/data`
+  - `workspace/workspace`
+- `ALLOWED_COMMANDS` 自动补齐：`go`、`curl`、`curl.exe`
+
+`filetool` 对相对路径优先按 workspace 根解析，避免写到代码仓库根目录。
+
+---
+
+## 6. ToolHost 子进程架构
+
+当前工具调用已迁移到 JSON-RPC 子进程：
+- 协议方法：`ping`、`tool.list`、`tool.call`
+- 父进程 `Client` 能力：
+  - 调用超时
+  - 心跳检测
+  - 失败重启与重试
+  - 并发限制（信号量）
+- 子进程 stdout 仅承载协议数据（避免日志污染 RPC）
+
+效果：工具崩溃不会直接拖垮 Agent 主编排逻辑。
+
+---
+
+## 7. ReAct 与技能路由
+
+`Orchestrator` 流程：
+1. 保存用户消息到 SQLite
+2. 读取最近对话并压缩
+3. LLM 进行技能路由（最多命中 2 个）
+4. 若无技能命中：尝试回退到 `创建skill` 技能
+5. 进入 ReAct 多轮决策（`action/final`）
+6. 工具调用观察写入 scratchpad
+7. 保存 assistant 回复
+
+工具错误会结构化为：
+- `ERROR_CODE=...; TOOL=...; REASON=...`
+
+---
+
+## 8. 能力缺口闭环（已落地）
+
+当出现“不会做”信号（如无 skill、解析失败、工具失败）时：
+1. 写入 `capability_gaps` 表
+2. 进行意图归一化聚类（按 `intent_key + reason`）
+3. 高频达到阈值后自动生成 skill 文件
+4. 自动调用 `ReloadSkills()` 热加载
+
+可通过消息命令查看与控制：
+- `/capability_gaps`：输出当前高频缺口清单
+- `/reload_skills`：手动热加载 skills
+
+---
+
+## 9. 自动生成 skill 的当前行为
+
+自动生成由 `internal/knowledge/drafts.go` 执行：
+- 目标目录：`AUTO_SKILL_DIR`（默认 workspace/skills）
+- 命名：`auto_<intent_key>/skill.md`
+- 仅在文件不存在时创建，避免重复覆盖
+- 模板内包含：触发背景、执行流程、工具建议、测试建议
+
+并额外提供基础引导技能：
+- `skills/skill_builder/skill.md`
+
+---
+
+## 10. file/shell 工具现状
+
+### file tool
+支持：
+- `read <path>`
+- `list <path>`
+- `write <path>\n<content>`
+
+特性：
+- 白名单路径检查
+- 目录误读防护：`read` 目录返回 `PATH_IS_DIRECTORY`
+- 输出长度限制
+
+### shell tool
+特性：
+- 命令白名单（首 token）
+- 超时中断
+- 固定工作目录
+- 输出截断
+- Windows 不可执行命令友好报错
+
+---
+
+## 11. 数据存储
+
+SQLite 表：
+1. `messages`：对话消息
+2. `capability_gaps`：能力缺口事件
+
+提供查询：
+- 最近消息
+- 最近缺口事件
+- 高频缺口聚类（含计数与最近出现时间）
+
+---
+
+## 12. 与最初文档相比的变化
+
+当前代码已经完成并替代旧文档中的以下“待实现项”：
+- ToolHost 子进程隔离（已实现）
+- 能力缺口闭环（已实现）
+- 自动 skill 生成与热加载（已实现）
+- workspace 配置优先与运行空间隔离（已实现）
+
+仍属于持续演进项：
+- 新工具代码自动注册与生效的全自动化流水线
+- 更细粒度权限域（按 skill/tool 分级）
+- 更强的自动化验收（e2e + 故障注入）
+
+---
+
+## 13. 下一步建议
+
+1. 为 `toolhost client/server` 增加专项故障单测（心跳失败、子进程崩溃、并发压力）。
+2. 增加“自动生成 tool 后自动接线注册”的流水线模块。 
+3. 为 skill 自动生成增加结构门禁（frontmatter/章节完整性校验）。
+4. 引入操作审计视图，串联 trace_id 与 capability_gap。
diff --git a/internal/agent/orchestrator.go b/internal/agent/orchestrator.go
index f55c6d1..92f8ef3 100644
--- a/internal/agent/orchestrator.go
+++ b/internal/agent/orchestrator.go
@@ -5,7 +5,10 @@ import (
 	"encoding/json"
 	"fmt"
 	"sort"
+	"strconv"
 	"strings"
+	"sync"
+	"time"
 
 	"laodingbot/internal/knowledge"
 	"laodingbot/internal/llm"
@@ -20,9 +23,14 @@ type Orchestrator struct {
 	tools        *tools.Registry
 	soul         string
 	skills       []knowledge.Skill
-	skillsDoc    string
+	skillsDir    string
+	autoSkillDir string
+	gapDraftTriggerCount int
+	gapLookbackDuration  time.Duration
 	reactMaxStep int
+	enableCapabilityGap bool
 	log          *logger.Logger
+	skillsMu     sync.RWMutex
 }
 
 func NewOrchestrator(
@@ -31,33 +39,66 @@ func NewOrchestrator(
 	registry *tools.Registry,
 	soul string,
 	skills []knowledge.Skill,
-	skillsDoc string,
+	skillsDir string,
 	reactMaxStep int,
+	enableCapabilityGap bool,
+	autoSkillDir string,
+	gapDraftTriggerCount int,
+	gapLookbackDuration time.Duration,
 	log *logger.Logger,
 ) *Orchestrator {
 	if reactMaxStep <= 0 {
 		reactMaxStep = 4
 	}
+	if gapDraftTriggerCount <= 0 {
+		gapDraftTriggerCount = 3
+	}
+	if gapLookbackDuration <= 0 {
+		gapLookbackDuration = 7 * 24 * time.Hour
+	}
+	if strings.TrimSpace(autoSkillDir) == "" {
+		autoSkillDir = skillsDir
+	}
 	return &Orchestrator{
 		llm:          llmClient,
 		store:        store,
 		tools:        registry,
 		soul:         soul,
 		skills:       skills,
-		skillsDoc:    skillsDoc,
+		skillsDir:    skillsDir,
+		autoSkillDir: autoSkillDir,
+		gapDraftTriggerCount: gapDraftTriggerCount,
+		gapLookbackDuration:  gapLookbackDuration,
 		reactMaxStep: reactMaxStep,
+		enableCapabilityGap: enableCapabilityGap,
 		log:          log,
 	}
 }
 
 func (o *Orchestrator) HandleMessage(ctx context.Context, chatID, userID, text string) (string, error) {
+	traceID := logger.NewTraceID()
+	ctx = logger.WithTraceID(ctx, traceID)
+	traceLogPrefix := "trace_id=" + traceID
 	if o.log != nil {
-		o.log.Infof("handle message chat_id=%s user_id=%s text_len=%d", chatID, userID, len(text))
-		o.log.Debugf("handle message text=%q", text)
+		o.log.Infof("%s handle message chat_id=%s user_id=%s text_len=%d", traceLogPrefix, chatID, userID, len(text))
+		o.log.Debugf("%s handle message text=%q", traceLogPrefix, text)
+	}
+	if strings.EqualFold(strings.TrimSpace(text), "/reload_skills") {
+		if err := o.ReloadSkills(); err != nil {
+			return "技能热加载失败: " + err.Error(), nil
+		}
+		return "技能已热加载完成。", nil
+	}
+	if strings.EqualFold(strings.TrimSpace(text), "/capability_gaps") {
+		report, err := o.BuildCapabilityGapReport(10)
+		if err != nil {
+			return "缺口报告生成失败: " + err.Error(), nil
+		}
+		return report, nil
 	}
 	if err := o.store.SaveMessage(chatID, userID, "user", text); err != nil {
 		if o.log != nil {
-			o.log.Errorf("save user message failed chat_id=%s err=%v", chatID, err)
+			o.log.Errorf("%s save user message failed chat_id=%s err=%v", traceLogPrefix, chatID, err)
 		}
 		return "", err
 	}
@@ -65,50 +106,59 @@ func (o *Orchestrator) HandleMessage(ctx context.Context, chatID, userID, text s
 	recent, err := o.store.LoadRecent(chatID, 16)
 	if err != nil {
 		if o.log != nil {
-			o.log.Errorf("load recent failed chat_id=%s err=%v", chatID, err)
+			o.log.Errorf("%s load recent failed chat_id=%s err=%v", traceLogPrefix, chatID, err)
 		}
 		return "", err
 	}
 	compressed := memory.CompressForPrompt(recent, 6000)
 	if o.log != nil {
-		o.log.Debugf("prompt context prepared chat_id=%s recent_count=%d compressed_len=%d", chatID, len(recent), len(compressed))
+		o.log.Debugf("%s prompt context prepared chat_id=%s recent_count=%d compressed_len=%d", traceLogPrefix, chatID, len(recent), len(compressed))
 	}
 
 	matchedSkills := o.matchSkills(ctx, compressed, text)
+	if len(matchedSkills) == 0 {
+		if bootstrap, ok := o.findSkillByKeyword("创建skill", "skill builder", "skill 创建", "构建技能"); ok {
+			matchedSkills = []knowledge.Skill{bootstrap}
+			if o.log != nil {
+				o.log.Infof("%s fallback bootstrap skill selected name=%s", traceLogPrefix, bootstrap.Name)
+			}
+		}
+	}
 
 	var response string
 	if len(matchedSkills) == 0 {
 		if o.log != nil {
-			o.log.Infof("no skill matched; use direct llm chat_id=%s", chatID)
+			o.log.Infof("%s no skill matched; use direct llm chat_id=%s", traceLogPrefix, chatID)
 		}
+		o.emitCapabilityGap(chatID, userID, text, "no_skill_matched")
 		response, err = o.runDirectLLM(ctx, compressed, text)
 	} else {
 		if o.log != nil {
 			names := make([]string, 0, len(matchedSkills))
 			for _, s := range matchedSkills {
 				names = append(names, s.Name)
-				o.log.Infof("skill selected name=%s source=%s", s.Name, s.Source)
-				o.log.Debugf("skill selected content name=%s content=%q", s.Name, s.Content)
+				o.log.Infof("%s skill selected name=%s source=%s", traceLogPrefix, s.Name, s.Source)
+				o.log.Debugf("%s skill selected content name=%s content=%q", traceLogPrefix, s.Name, s.Content)
 			}
-			o.log.Infof("skills matched chat_id=%s skills=%s", chatID, strings.Join(names, ","))
+			o.log.Infof("%s skills matched chat_id=%s skills=%s", traceLogPrefix, chatID, strings.Join(names, ","))
 		}
-		response, err = o.runReAct(ctx, compressed, text, matchedSkills)
+		response, err = o.runReAct(ctx, chatID, userID, compressed, text, matchedSkills)
 	}
 	if err != nil {
 		if o.log != nil {
-			o.log.Errorf("message generation failed chat_id=%s err=%v", chatID, err)
+			o.log.Errorf("%s message generation failed chat_id=%s err=%v", traceLogPrefix, chatID, err)
 		}
 		return "", err
 	}
 
 	if err := o.store.SaveMessage(chatID, userID, "assistant", response); err != nil {
 		if o.log != nil {
-			o.log.Errorf("save assistant response failed chat_id=%s err=%v", chatID, err)
+			o.log.Errorf("%s save assistant response failed chat_id=%s err=%v", traceLogPrefix, chatID, err)
 		}
 		return "", err
 	}
 	if o.log != nil {
-		o.log.Infof("message handled chat_id=%s response_len=%d", chatID, len(response))
+		o.log.Infof("%s message handled chat_id=%s response_len=%d", traceLogPrefix, chatID, len(response))
 	}
 	return response, nil
 }
@@ -140,7 +190,9 @@ type reactDecision struct {
 	Final       string `json:"final"`
 }
 
-func (o *Orchestrator) runReAct(ctx context.Context, compressedContext, userInput string, selectedSkills []knowledge.Skill) (string, error) {
+func (o *Orchestrator) runReAct(ctx context.Context, chatID, userID, compressedContext, userInput string, selectedSkills []knowledge.Skill) (string, error) {
+	traceID := logger.TraceIDFromContext(ctx)
+	traceLogPrefix := "trace_id=" + traceID
 	selectedSkillsDoc := formatSkills(selectedSkills)
 	toolDoc := o.formatToolDoc()
 	if o.log != nil {
@@ -148,9 +200,9 @@ func (o *Orchestrator) runReAct(ctx context.Context, compressedContext, userInpu
 		for _, s := range selectedSkills {
 			names = append(names, s.Name)
 		}
-		o.log.Infof("react start steps=%d skills=%s", o.reactMaxStep, strings.Join(names, ","))
-		o.log.Debugf("react selected_skills_doc=%q", selectedSkillsDoc)
-		o.log.Debugf("react tools_doc=%q", toolDoc)
+		o.log.Infof("%s react start steps=%d skills=%s", traceLogPrefix, o.reactMaxStep, strings.Join(names, ","))
+		o.log.Debugf("%s react selected_skills_doc=%q", traceLogPrefix, selectedSkillsDoc)
+		o.log.Debugf("%s react tools_doc=%q", traceLogPrefix, toolDoc)
 	}
 
 	systemPrompt := strings.Join([]string{
@@ -176,8 +228,8 @@ func (o *Orchestrator) runReAct(ctx context.Context, compressedContext, userInpu
 	scratchpad := ""
 	for step := 1; step <= o.reactMaxStep; step++ {
 		if o.log != nil {
-			o.log.Infof("react step start step=%d/%d", step, o.reactMaxStep)
-			o.log.Debugf("react scratchpad_before step=%d content=%q", step, scratchpad)
+			o.log.Infof("%s react step start step=%d/%d", traceLogPrefix, step, o.reactMaxStep)
+			o.log.Debugf("%s react scratchpad_before step=%d content=%q", traceLogPrefix, step, scratchpad)
 		}
 		prompt := strings.Join([]string{
 			"历史上下文：",
@@ -197,17 +249,18 @@ func (o *Orchestrator) runReAct(ctx context.Context, compressedContext, userInpu
 			return "", err
 		}
 		if o.log != nil {
-			o.log.Infof("react step llm output step=%d raw=%q", step, raw)
+			o.log.Infof("%s react step llm output step=%d raw=%q", traceLogPrefix, step, raw)
 		}
 		decision, err := parseDecision(raw)
 		if err != nil {
 			if o.log != nil {
-				o.log.Warnf("react parse failed, use raw as final err=%v", err)
+				o.log.Warnf("%s react parse failed, fallback to direct llm err=%v", traceLogPrefix, err)
 			}
-			return strings.TrimSpace(raw), nil
+			o.emitCapabilityGap(chatID, userID, userInput, "react_parse_failed")
+			return o.runDirectLLM(ctx, compressedContext, userInput)
 		}
 		if o.log != nil {
-			o.log.Infof("react step decision step=%d thought=%q action=%q action_input=%q final=%q", step, decision.Thought, decision.Action, decision.ActionInput, decision.Final)
+			o.log.Infof("%s react step decision step=%d thought=%q action=%q action_input=%q final=%q", traceLogPrefix, step, decision.Thought, decision.Action, decision.ActionInput, decision.Final)
 		}
 
 		action := strings.ToLower(strings.TrimSpace(decision.Action))
@@ -221,7 +274,7 @@ func (o *Orchestrator) runReAct(ctx context.Context, compressedContext, userInpu
 				finalText = "我已完成思考，但当前没有足够信息给出稳定结论。"
 			}
 			if o.log != nil {
-				o.log.Infof("react final step=%d final=%q", step, finalText)
+				o.log.Infof("%s react final step=%d final=%q", traceLogPrefix, step, finalText)
 			}
 			return finalText, nil
 		}
@@ -229,37 +282,45 @@ func (o *Orchestrator) runReAct(ctx context.Context, compressedContext, userInpu
 		tool, ok := o.tools.Get(action)
 		if !ok {
 			if o.log != nil {
-				o.log.Warnf("react step tool missing step=%d tool=%s", step, action)
+				o.log.Warnf("%s react step tool missing step=%d tool=%s", traceLogPrefix, step, action)
 			}
-			scratchpad += fmt.Sprintf("Step %d Thought: %s\nStep %d Observation: tool %s 不存在\n", step, decision.Thought, step, action)
+			scratchpad += "Step " + strconv.Itoa(step) + " Thought: " + decision.Thought + "\n"
+			scratchpad += "Step " + strconv.Itoa(step) + " Observation: " + formatToolErrorObservation("TOOL_NOT_FOUND", action, "tool not found") + "\n"
+			o.emitCapabilityGap(chatID, userID, userInput, "tool_not_found:"+action)
 			continue
 		}
 
 		toolOut, toolErr := tool.Call(ctx, decision.ActionInput)
 		if o.log != nil {
-			o.log.Infof("react step tool call step=%d tool=%s input=%q", step, action, decision.ActionInput)
+			o.log.Infof("%s react step tool call step=%d tool=%s input=%q", traceLogPrefix, step, action, decision.ActionInput)
 		}
 		obs := strings.TrimSpace(toolOut)
 		if obs == "" {
 			obs = "(empty output)"
 		}
 		if toolErr != nil {
-			obs = obs + "\nERROR: " + toolErr.Error()
+			obs = formatToolErrorObservation("TOOL_EXEC_ERROR", action, toolErr.Error()) + "\nOUTPUT:\n" + obs
+			o.emitCapabilityGap(chatID, userID, userInput, "tool_call_failed:"+action)
 		}
 		if o.log != nil {
-			o.log.Infof("react step observation step=%d tool=%s observation=%q", step, action, obs)
+			o.log.Infof("%s react step observation step=%d tool=%s observation=%q", traceLogPrefix, step, action, obs)
 		}
 		if len(obs) > 2000 {
 			obs = obs[:2000]
 		}
-		scratchpad += fmt.Sprintf("Step %d Thought: %s\nStep %d Action: %s\nStep %d ActionInput: %s\nStep %d Observation: %s\n", step, decision.Thought, step, action, step, decision.ActionInput, step, obs)
+		scratchpad += "Step " + strconv.Itoa(step) + " Thought: " + decision.Thought + "\n"
+		scratchpad += "Step " + strconv.Itoa(step) + " Action: " + action + "\n"
+		scratchpad += "Step " + strconv.Itoa(step) + " ActionInput: " + decision.ActionInput + "\n"
+		scratchpad += "Step " + strconv.Itoa(step) + " Observation: " + obs + "\n"
 	}
 
+	o.emitCapabilityGap(chatID, userID, userInput, "react_step_exhausted")
 	return "我尝试了多轮思考与工具调用，但仍未得到稳定结论。请给我更具体的约束或允许我继续尝试。", nil
 }
 
 func (o *Orchestrator) matchSkills(ctx context.Context, compressedContext, userInput string) []knowledge.Skill {
-	if len(o.skills) == 0 {
+	skills := o.getSkillsSnapshot()
+	if len(skills) == 0 {
 		return nil
 	}
 
@@ -277,7 +338,7 @@ func (o *Orchestrator) matchSkills(ctx context.Context, compressedContext, userI
 
 	userPrompt := strings.Join([]string{
 		"候选技能：",
-		formatSkillCatalog(o.skills),
+		formatSkillCatalog(skills),
 		"",
 		"历史上下文：",
 		compressedContext,
@@ -316,7 +377,7 @@ func (o *Orchestrator) matchSkills(ctx context.Context, compressedContext, userI
 		if _, ok := seen[name]; ok {
 			continue
 		}
-		for _, skill := range o.skills {
+		for _, skill := range skills {
 			if strings.ToLower(strings.TrimSpace(skill.Name)) == name {
 				picked = append(picked, skill)
 				seen[name] = struct{}{}
@@ -338,28 +399,132 @@ func (o *Orchestrator) matchSkills(ctx context.Context, compressedContext, userI
 	return picked
 }
 
-func parseDecision(raw string) (reactDecision, error) {
-	raw = normalizeJSON(raw)
-	start := strings.Index(raw, "{")
-	end := strings.LastIndex(raw, "}")
-	if start < 0 || end < start {
-		return reactDecision{}, fmt.Errorf("no json object found")
+func (o *Orchestrator) emitCapabilityGap(chatID, userID, intent, reason string) {
+	if !o.enableCapabilityGap {
+		return
+	}
+	intent = strings.TrimSpace(intent)
+	reason = strings.TrimSpace(reason)
+	if intent == "" || reason == "" {
+		return
+	}
+	if len(intent) > 1000 {
+		intent = intent[:1000]
+	}
+	if len(reason) > 240 {
+		reason = reason[:240]
+	}
+	if err := o.store.SaveCapabilityGap(chatID, userID, intent, reason); err != nil && o.log != nil {
+		o.log.Warnf("save capability gap failed chat_id=%s user_id=%s err=%v", chatID, userID, err)
+		return
 	}
-	raw = raw[start : end+1]
 
-	var out reactDecision
-	if err := json.Unmarshal([]byte(raw), &out); err != nil {
-		return reactDecision{}, err
+	clusters, err := o.store.TopCapabilityGapClusters(20, time.Now().UTC().Add(-o.gapLookbackDuration))
+	if err != nil {
+		if o.log != nil {
+			o.log.Warnf("query capability gap clusters failed err=%v", err)
+		}
+		return
+	}
+	for _, c := range clusters {
+		if c.Count < o.gapDraftTriggerCount {
+			continue
+		}
+		path, created, draftErr := knowledge.GenerateSkillDraft(c, o.autoSkillDir)
+		if draftErr != nil {
+			if o.log != nil {
+				o.log.Warnf("generate skill draft failed intent_key=%s reason=%s err=%v", c.IntentKey, c.Reason, draftErr)
+			}
+			continue
+		}
+		if created && o.log != nil {
+			o.log.Infof("capability gap draft generated path=%s intent_key=%s reason=%s count=%d", path, c.IntentKey, c.Reason, c.Count)
+		}
+		if created {
+			if reloadErr := o.ReloadSkills(); reloadErr != nil && o.log != nil {
+				o.log.Warnf("auto reload skills failed after generation path=%s err=%v", path, reloadErr)
+			}
+		}
 	}
-	return out, nil
 }
 
-func normalizeJSON(raw string) string {
-	raw = strings.TrimSpace(raw)
-	raw = strings.TrimPrefix(raw, "```json")
-	raw = strings.TrimPrefix(raw, "```")
-	raw = strings.TrimSuffix(raw, "```")
-	return strings.TrimSpace(raw)
+func (o *Orchestrator) ReloadSkills() error {
+	skills, err := knowledge.LoadSkillSet(o.skillsDir)
+	if err != nil {
+		return err
+	}
+	o.skillsMu.Lock()
+	o.skills = skills
+	o.skillsMu.Unlock()
+	if o.log != nil {
+		o.log.Infof("skills hot reloaded count=%d dir=%s", len(skills), o.skillsDir)
+	}
+	return nil
+}
+
+func (o *Orchestrator) getSkillsSnapshot() []knowledge.Skill {
+	o.skillsMu.RLock()
+	defer o.skillsMu.RUnlock()
+	out := make([]knowledge.Skill, len(o.skills))
+	copy(out, o.skills)
+	return out
+}
+
+func (o *Orchestrator) BuildCapabilityGapReport(limit int) (string, error) {
+	clusters, err := o.store.TopCapabilityGapClusters(limit, time.Now().UTC().Add(-o.gapLookbackDuration))
+	if err != nil {
+		return "", err
+	}
+	if len(clusters) == 0 {
+		return "最近没有采集到能力缺口记录。", nil
+	}
+	b := strings.Builder{}
+	b.WriteString("高频能力缺口清单：\n")
+	for i, c := range clusters {
+		line := fmt.Sprintf("%d) intent=%s | reason=%s | count=%d | last_seen=%s\n", i+1, c.IntentKey, c.Reason, c.Count, c.LastSeenAt.Format("2006-01-02 15:04:05"))
+		b.WriteString(line)
+	}
+	b.WriteString("\n草稿目录：")
+	b.WriteString(o.autoSkillDir)
+	b.WriteString("\n系统会在达到阈值后自动生成并热加载技能；你也可以手动发送 /reload_skills。")
+	return b.String(), nil
+}
+
+func (o *Orchestrator) findSkillByKeyword(keywords ...string) (knowledge.Skill, bool) {
+	if len(keywords) == 0 {
+		return knowledge.Skill{}, false
+	}
+	skills := o.getSkillsSnapshot()
+	for _, s := range skills {
+		name := strings.ToLower(strings.TrimSpace(s.Name))
+		content := strings.ToLower(strings.TrimSpace(s.Content))
+		for _, kw := range keywords {
+			kw = strings.ToLower(strings.TrimSpace(kw))
+			if kw == "" {
+				continue
+			}
+			if strings.Contains(name, kw) || strings.Contains(content, kw) {
+				return s, true
+			}
+		}
+	}
+	return knowledge.Skill{}, false
+}
+
+func formatToolErrorObservation(code, action, reason string) string {
+	code = strings.TrimSpace(code)
+	action = strings.TrimSpace(action)
+	reason = strings.TrimSpace(reason)
+	if code == "" {
+		code = "TOOL_EXEC_ERROR"
+	}
+	if action == "" {
+		action = "unknown"
+	}
+	if reason == "" {
+		reason = "unknown error"
+	}
+	return "ERROR_CODE=" + code + "; TOOL=" + action + "; REASON=" + reason
 }
 
 func formatSkills(skills []knowledge.Skill) string {
diff --git a/internal/agent/react_parser.go b/internal/agent/react_parser.go
new file mode 100644
index 0000000..d15afd6
--- /dev/null
+++ b/internal/agent/react_parser.go
@@ -0,0 +1,31 @@
+package agent
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+func parseDecision(raw string) (reactDecision, error) {
+	raw = normalizeJSON(raw)
+	start := strings.Index(raw, "{")
+	end := strings.LastIndex(raw, "}")
+	if start < 0 || end < start {
+		return reactDecision{}, fmt.Errorf("no json object found")
+	}
+	raw = raw[start : end+1]
+
+	var out reactDecision
+	if err := json.Unmarshal([]byte(raw), &out); err != nil {
+		return reactDecision{}, err
+	}
+	return out, nil
+}
+
+func normalizeJSON(raw string) string {
+	raw = strings.TrimSpace(raw)
+	raw = strings.TrimPrefix(raw, "```json")
+	raw = strings.TrimPrefix(raw, "```")
+	raw = strings.TrimSuffix(raw, "```")
+	return strings.TrimSpace(raw)
+}
diff --git a/internal/agent/react_parser_test.go b/internal/agent/react_parser_test.go
new file mode 100644
index 0000000..a36efdd
--- /dev/null
+++ b/internal/agent/react_parser_test.go
@@ -0,0 +1,32 @@
+package agent
+
+import "testing"
+
+func TestParseDecisionPlainJSON(t *testing.T) {
+	raw := `{"thought":"t","action":"none","action_input":"","final":"ok"}`
+	got, err := parseDecision(raw)
+	if err != nil {
+		t.Fatalf("parseDecision error: %v", err)
+	}
+	if got.Action != "none" || got.Final != "ok" {
+		t.Fatalf("unexpected decision: %+v", got)
+	}
+}
+
+func TestParseDecisionCodeFence(t *testing.T) {
+	raw := "```json\n{\"thought\":\"t\",\"action\":\"shell\",\"action_input\":\"ls\",\"final\":\"\"}\n```"
+	got, err := parseDecision(raw)
+	if err != nil {
+		t.Fatalf("parseDecision error: %v", err)
+	}
+	if got.Action != "shell" || got.ActionInput != "ls" {
+		t.Fatalf("unexpected decision: %+v", got)
+	}
+}
+
+func TestParseDecisionInvalid(t *testing.T) {
+	_, err := parseDecision("not json")
+	if err == nil {
+		t.Fatal("expected parse error")
+	}
+}
diff --git a/internal/config/config.go b/internal/config/config.go
index b0552d6..f31ede6 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -16,6 +16,12 @@ type Config struct {
 	SoulPath       string
 	SkillsDir      string
 	ReactMaxSteps  int
+	ToolCallTimeoutSec int
+	ToolOutputMaxChars int
+	EnableCapabilityGap bool
+	AutoSkillDir             string
+	GapDraftTriggerCount     int
+	GapClusterLookbackHours  int
 
 	Telegram TelegramConfig
 	Feishu   FeishuConfig
@@ -51,16 +57,25 @@ type SecurityConfig struct {
 }
 
 func Load() (Config, error) {
+	agentWorkspaceDir := resolveAgentWorkspaceDir()
 	if err := preloadEnvFiles(); err != nil {
 		return Config{}, err
 	}
+	defaultWorkSubdir := filepath.Join(agentWorkspaceDir, "workspace")
+	defaultDataDir := filepath.Join(agentWorkspaceDir, "data")
 
 	cfg := Config{
 		MessageChannel: defaultIfEmpty(os.Getenv("MESSAGE_CHANNEL"), "telegram"),
 		LogLevel:       defaultIfEmpty(os.Getenv("LOG_LEVEL"), "info"),
-		SoulPath:       defaultIfEmpty(os.Getenv("SOUL_PATH"), "./bot_context/soul.md"),
-		SkillsDir:      defaultIfEmpty(os.Getenv("SKILLS_DIR"), "./skills"),
-		ReactMaxSteps:  intFromEnv("REACT_MAX_STEPS", 4),
+		SoulPath:       defaultIfEmpty(os.Getenv("SOUL_PATH"), filepath.Join(agentWorkspaceDir, "bot_context", "soul.md")),
+		SkillsDir:      defaultIfEmpty(os.Getenv("SKILLS_DIR"), filepath.Join(agentWorkspaceDir, "skills")),
+		ReactMaxSteps:  intFromEnv("REACT_MAX_STEPS", 0),
+		ToolCallTimeoutSec: intFromEnv("TOOL_CALL_TIMEOUT_SEC", 15),
+		ToolOutputMaxChars: intFromEnv("TOOL_OUTPUT_MAX_CHARS", 4000),
+		EnableCapabilityGap: boolFromEnv("ENABLE_CAPABILITY_GAP", true),
+		AutoSkillDir:            defaultIfEmpty(os.Getenv("AUTO_SKILL_DIR"), filepath.Join(agentWorkspaceDir, "skills")),
+		GapDraftTriggerCount:    intFromEnv("GAP_DRAFT_TRIGGER_COUNT", 3),
+		GapClusterLookbackHours: intFromEnv("GAP_CLUSTER_LOOKBACK_HOURS", 168),
 		Telegram: TelegramConfig{
 			Token:              strings.TrimSpace(os.Getenv("TELEGRAM_BOT_TOKEN")),
 			PollTimeoutSeconds: intFromEnv("TELEGRAM_POLL_TIMEOUT_SECONDS", 30),
@@ -77,11 +92,11 @@ func Load() (Config, error) {
 			APIKey:  strings.TrimSpace(os.Getenv("LLM_API_KEY")),
 			Model:   defaultIfEmpty(os.Getenv("LLM_MODEL"), "gpt-4o-mini"),
 		},
-		SQLitePath: defaultIfEmpty(os.Getenv("SQLITE_PATH"), "./data/laodingbot.db"),
+		SQLitePath: defaultIfEmpty(os.Getenv("SQLITE_PATH"), filepath.Join(defaultDataDir, "laodingbot.db")),
 		Security: SecurityConfig{
-			AllowedDirs:     splitCSV(defaultIfEmpty(os.Getenv("ALLOWED_DIRS"), "./workspace,./data")),
-			AllowedCommands: splitCSV(defaultIfEmpty(os.Getenv("ALLOWED_COMMANDS"), "pwd,ls,cat,echo,grep,find,head,tail")),
-			WorkDir:         defaultIfEmpty(os.Getenv("WORK_DIR"), "./workspace"),
+			AllowedDirs:     splitCSV(defaultIfEmpty(os.Getenv("ALLOWED_DIRS"), strings.Join([]string{agentWorkspaceDir, defaultDataDir, defaultWorkSubdir}, ","))),
+			AllowedCommands: splitCSV(defaultIfEmpty(os.Getenv("ALLOWED_COMMANDS"), "pwd,ls,cat,echo,grep,find,head,tail,go")),
+			WorkDir:         defaultIfEmpty(os.Getenv("WORK_DIR"), defaultWorkSubdir),
 		},
 	}
 
@@ -96,6 +111,18 @@ func Load() (Config, error) {
 	if cfg.ReactMaxSteps < 1 || cfg.ReactMaxSteps > 8 {
 		return Config{}, fmt.Errorf("REACT_MAX_STEPS must be between 1 and 8")
 	}
+	if cfg.ToolCallTimeoutSec < 1 || cfg.ToolCallTimeoutSec > 300 {
+		return Config{}, fmt.Errorf("TOOL_CALL_TIMEOUT_SEC must be between 1 and 300")
+	}
+	if cfg.ToolOutputMaxChars < 256 || cfg.ToolOutputMaxChars > 200000 {
+		return Config{}, fmt.Errorf("TOOL_OUTPUT_MAX_CHARS must be between 256 and 200000")
+	}
+	if cfg.GapDraftTriggerCount < 1 || cfg.GapDraftTriggerCount > 100 {
+		return Config{}, fmt.Errorf("GAP_DRAFT_TRIGGER_COUNT must be between 1 and 100")
+	}
+	if cfg.GapClusterLookbackHours < 1 || cfg.GapClusterLookbackHours > 24*365 {
+		return Config{}, fmt.Errorf("GAP_CLUSTER_LOOKBACK_HOURS must be between 1 and 8760")
+	}
 
 	if cfg.MessageChannel == "telegram" {
 		if cfg.Telegram.Token == "" {
@@ -119,28 +146,152 @@ func Load() (Config, error) {
 		return Config{}, fmt.Errorf("LLM_API_KEY is required")
 	}
 
+	cfg.SoulPath = resolvePathInWorkspace(cfg.SoulPath, agentWorkspaceDir)
+	cfg.SkillsDir = resolvePathInWorkspace(cfg.SkillsDir, agentWorkspaceDir)
+	cfg.AutoSkillDir = resolvePathInWorkspace(cfg.AutoSkillDir, agentWorkspaceDir)
+	cfg.SQLitePath = resolvePathInWorkspace(cfg.SQLitePath, agentWorkspaceDir)
+	cfg.Security.WorkDir = resolvePathInWorkspace(cfg.Security.WorkDir, agentWorkspaceDir)
+	cfg.Security.AllowedDirs = resolveDirsInWorkspace(cfg.Security.AllowedDirs, agentWorkspaceDir)
+	cfg.Security.AllowedDirs = ensureAllowedDirs(cfg.Security.AllowedDirs,
+		filepath.Clean(agentWorkspaceDir),
+		filepath.Join(agentWorkspaceDir, "skills"),
+		filepath.Join(agentWorkspaceDir, "data"),
+		filepath.Join(agentWorkspaceDir, "workspace"),
+	)
+	cfg.Security.AllowedCommands = ensureAllowedCommands(cfg.Security.AllowedCommands, "go", "curl", "curl.exe")
+
 	return cfg, nil
 }
 
 func preloadEnvFiles() error {
-	paths := []string{}
-	if explicit := strings.TrimSpace(os.Getenv("CONFIG_ENV_FILE")); explicit != "" {
-		paths = append(paths, explicit)
-	}
-	paths = append(paths, "configs/env", ".env")
-
-	for _, p := range paths {
-		if err := loadEnvFile(p); err != nil {
-			if errors.Is(err, os.ErrNotExist) {
-				continue
-			}
-			return fmt.Errorf("load env file %s failed: %w", p, err)
+	explicit := strings.TrimSpace(os.Getenv("CONFIG_ENV_FILE"))
+	if explicit != "" {
+		if err := tryLoadEnvFile(explicit, true); err != nil {
+			return fmt.Errorf("load env file %s failed: %w", explicit, err)
 		}
 	}
+
+	workspaceDir := resolveAgentWorkspaceDir()
+	workspaceEnv := filepath.Join(workspaceDir, "configs", "env")
+	workspaceDotEnv := filepath.Join(workspaceDir, ".env")
+	if err := tryLoadEnvFile(workspaceEnv, true); err != nil {
+		return fmt.Errorf("load env file %s failed: %w", workspaceEnv, err)
+	}
+	if err := tryLoadEnvFile(workspaceDotEnv, true); err != nil {
+		return fmt.Errorf("load env file %s failed: %w", workspaceDotEnv, err)
+	}
+
+	if err := tryLoadEnvFile("configs/env", false); err != nil {
+		return fmt.Errorf("load env file %s failed: %w", "configs/env", err)
+	}
+	if err := tryLoadEnvFile(".env", false); err != nil {
+		return fmt.Errorf("load env file %s failed: %w", ".env", err)
+	}
 	return nil
 }
 
-func loadEnvFile(path string) error {
+func tryLoadEnvFile(path string, override bool) error {
+	err := loadEnvFile(path, override)
+	if err != nil && errors.Is(err, os.ErrNotExist) {
+		return nil
+	}
+	return err
+}
+
+func resolveAgentWorkspaceDir() string {
+	raw := strings.TrimSpace(os.Getenv("AGENT_WORKSPACE_DIR"))
+	if raw == "" {
+		raw = filepath.Join(".", "workspace", "agent_runtime")
+	}
+	abs, err := filepath.Abs(raw)
+	if err != nil {
+		return raw
+	}
+	return abs
+}
+
+func resolvePathInWorkspace(path, workspaceDir string) string {
+	path = strings.TrimSpace(path)
+	if path == "" {
+		return path
+	}
+	if filepath.IsAbs(path) {
+		return filepath.Clean(path)
+	}
+	return filepath.Clean(filepath.Join(workspaceDir, path))
+}
+
+func resolveDirsInWorkspace(dirs []string, workspaceDir string) []string {
+	out := make([]string, 0, len(dirs))
+	for _, d := range dirs {
+		resolved := resolvePathInWorkspace(d, workspaceDir)
+		if strings.TrimSpace(resolved) != "" {
+			out = append(out, resolved)
+		}
+	}
+	if len(out) == 0 {
+		out = append(out, filepath.Clean(workspaceDir))
+	}
+	return out
+}
+
+func ensureAllowedDirs(existing []string, dirs ...string) []string {
+	set := map[string]struct{}{}
+	out := make([]string, 0, len(existing)+len(dirs))
+	for _, d := range existing {
+		clean := filepath.Clean(strings.TrimSpace(d))
+		if clean == "" {
+			continue
+		}
+		if _, ok := set[clean]; ok {
+			continue
+		}
+		set[clean] = struct{}{}
+		out = append(out, clean)
+	}
+	for _, d := range dirs {
+		clean := filepath.Clean(strings.TrimSpace(d))
+		if clean == "" {
+			continue
+		}
+		if _, ok := set[clean]; ok {
+			continue
+		}
+		set[clean] = struct{}{}
+		out = append(out, clean)
+	}
+	return out
+}
+
+func ensureAllowedCommands(existing []string, commands ...string) []string {
+	set := map[string]struct{}{}
+	out := make([]string, 0, len(existing)+len(commands))
+	for _, c := range existing {
+		cmd := strings.ToLower(strings.TrimSpace(c))
+		if cmd == "" {
+			continue
+		}
+		if _, ok := set[cmd]; ok {
+			continue
+		}
+		set[cmd] = struct{}{}
+		out = append(out, cmd)
+	}
+	for _, c := range commands {
+		cmd := strings.ToLower(strings.TrimSpace(c))
+		if cmd == "" {
+			continue
+		}
+		if _, ok := set[cmd]; ok {
+			continue
+		}
+		set[cmd] = struct{}{}
+		out = append(out, cmd)
+	}
+	return out
+}
+
+func loadEnvFile(path string, override bool) error {
 	absPath, err := filepath.Abs(path)
 	if err != nil {
 		absPath = path
@@ -173,11 +324,14 @@ func loadEnvFile(path string) error {
 				val = val[1 : len(val)-1]
 			}
 		}
-		if _, exists := os.LookupEnv(key); !exists {
-			if err := os.Setenv(key, val); err != nil {
-				return err
+		if !override {
+			if _, exists := os.LookupEnv(key); exists {
+				continue
 			}
 		}
+		if err := os.Setenv(key, val); err != nil {
+			return err
+		}
 	}
 	return scanner.Err()
 }
@@ -202,6 +356,20 @@ func intFromEnv(name string, d int) int {
 	return v
 }
 
+func boolFromEnv(name string, d bool) bool {
+	raw := strings.ToLower(strings.TrimSpace(os.Getenv(name)))
+	if raw == "" {
+		return d
+	}
+	if raw == "1" || raw == "true" || raw == "yes" || raw == "on" {
+		return true
+	}
+	if raw == "0" || raw == "false" || raw == "no" || raw == "off" {
+		return false
+	}
+	return d
+}
+
 func splitCSV(raw string) []string {
 	parts := strings.Split(raw, ",")
 	out := make([]string, 0, len(parts))
diff --git a/internal/knowledge/drafts.go b/internal/knowledge/drafts.go
new file mode 100644
index 0000000..44654ed
--- /dev/null
+++ b/internal/knowledge/drafts.go
@@ -0,0 +1,141 @@
+package knowledge
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"laodingbot/internal/memory"
+)
+
+func GenerateSkillDraft(cluster memory.CapabilityGapCluster, draftRoot string) (string, bool, error) {
+	draftRoot = strings.TrimSpace(draftRoot)
+	if draftRoot == "" {
+		draftRoot = "./skills"
+	}
+	if err := os.MkdirAll(draftRoot, 0o755); err != nil {
+		return "", false, err
+	}
+
+	skillDirName := "auto_" + slugFromIntent(cluster.IntentKey)
+	if skillDirName == "" {
+		skillDirName = "auto_gap_skill"
+	}
+	dir := filepath.Join(draftRoot, skillDirName)
+	file := filepath.Join(dir, "skill.md")
+
+	if _, err := os.Stat(file); err == nil {
+		return file, false, nil
+	}
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return "", false, err
+	}
+
+	skillTitle := titleFromIntent(cluster.SampleIntent)
+	if skillTitle == "" {
+		skillTitle = "能力缺口补全技能"
+	}
+	content := buildDraftMarkdown(skillTitle, cluster)
+	if err := os.WriteFile(file, []byte(content), 0o644); err != nil {
+		return "", false, err
+	}
+	return file, true, nil
+}
+
+func buildDraftMarkdown(skillTitle string, cluster memory.CapabilityGapCluster) string {
+	createdAt := time.Now().Format(time.RFC3339)
+	return strings.TrimSpace(fmt.Sprintf(`---
+name: %s
+description: 由 capability_gap 自动生成并用于自动补全缺失能力。
+source: capability_gap
+generated_at: %s
+cluster_intent_key: %s
+cluster_reason: %s
+cluster_count: %d
+---
+
+# Skill: %s
+
+## 背景
+- 该技能由系统根据高频能力缺口自动生成并已纳入技能目录。
+- 最近高频缺口聚类：`+"`%s`"+`
+- 缺口原因：`+"`%s`"+`
+- 出现次数：`+"`%d`"+`
+
+## 目标能力
+- 明确该类问题应如何判断是否需要调用工具。
+- 约束输入输出，避免泛化过度。
+- 在失败时提供可操作回退路径。
+
+## 建议触发信号
+- 用户提问与下述意图高度相关：`+"`%s`"+`
+- 现有技能未命中，或命中后无法完成。
+
+## 建议工具
+- 优先使用现有工具：`+"`shell`"+`、`+"`file`"+`。
+- 若能力不足，需要创建新工具时：
+	1. 在 `+"`internal/tools/<tool_name>/`"+` 下生成 Go 代码；
+	2. 在 `+"`cmd/bot/main.go`"+` 或 toolhost 注册逻辑中完成注册；
+	3. 生成/补充 `+"`*_test.go`"+`；
+	4. 调用 `+"`go test ./...`"+` 验证。
+
+## ReAct 指南
+1. 先确认用户目标和输入约束。
+2. 判断是否可直接回答；若不行，再选择工具。
+3. 工具调用前先最小化探测范围。
+4. 工具失败时输出原因与下一步建议。
+5. 若缺少 skill：使用 `+"`file`"+` 与 `+"`shell`"+` 创建新的 `+"`skills/<skill_name>/skill.md`"+`。
+6. 若缺少 tool：生成工具代码与测试后执行 `+"`go test ./...`"+`。
+
+## 输出规范
+- 结论：一句话给出当前阶段结论。
+- 依据：列出关键观察与证据。
+- 限制：说明当前不确定性。
+- 下一步：给用户可执行动作。
+`, skillTitle, createdAt, cluster.IntentKey, cluster.Reason, cluster.Count, skillTitle, cluster.IntentKey, cluster.Reason, cluster.Count, cluster.SampleIntent))
+}
+
+func slugFromIntent(intent string) string {
+	intent = strings.TrimSpace(strings.ToLower(intent))
+	if intent == "" {
+		return ""
+	}
+	b := strings.Builder{}
+	lastDash := false
+	for _, r := range intent {
+		isAlphaNum := (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9')
+		isCJK := r >= 0x4e00 && r <= 0x9fff
+		if isAlphaNum || isCJK {
+			b.WriteRune(r)
+			lastDash = false
+			continue
+		}
+		if !lastDash {
+			b.WriteRune('-')
+			lastDash = true
+		}
+	}
+	out := strings.Trim(b.String(), "-")
+	if out == "" {
+		return ""
+	}
+	runes := []rune(out)
+	if len(runes) > 48 {
+		out = string(runes[:48])
+	}
+	return out
+}
+
+func titleFromIntent(intent string) string {
+	intent = strings.TrimSpace(intent)
+	if intent == "" {
+		return ""
+	}
+	runes := []rune(intent)
+	if len(runes) > 32 {
+		intent = string(runes[:32])
+	}
+	return intent
+}
diff --git a/internal/knowledge/drafts_test.go b/internal/knowledge/drafts_test.go
new file mode 100644
index 0000000..7c286ff
--- /dev/null
+++ b/internal/knowledge/drafts_test.go
@@ -0,0 +1,37 @@
+package knowledge
+
+import (
+	"path/filepath"
+	"testing"
+
+	"laodingbot/internal/memory"
+)
+
+func TestGenerateSkillDraftCreatesFile(t *testing.T) {
+	draftDir := filepath.Join(t.TempDir(), "drafts")
+	cluster := memory.CapabilityGapCluster{
+		IntentKey:    "query files in workspace",
+		SampleIntent: "帮我查询 workspace 目录下的 markdown 文件",
+		Reason:       "no_skill_matched",
+		Count:        4,
+	}
+
+	path, created, err := GenerateSkillDraft(cluster, draftDir)
+	if err != nil {
+		t.Fatalf("GenerateSkillDraft error: %v", err)
+	}
+	if !created {
+		t.Fatalf("expected created=true")
+	}
+	if filepath.Base(path) != "skill.md" {
+		t.Fatalf("expected skill.md path, got %s", path)
+	}
+
+	_, created2, err := GenerateSkillDraft(cluster, draftDir)
+	if err != nil {
+		t.Fatalf("GenerateSkillDraft second call error: %v", err)
+	}
+	if created2 {
+		t.Fatalf("expected created=false on second call")
+	}
+}
diff --git a/internal/knowledge/loader.go b/internal/knowledge/loader.go
index d1034a6..9139287 100644
--- a/internal/knowledge/loader.go
+++ b/internal/knowledge/loader.go
@@ -26,28 +26,6 @@ func LoadSoul(path string) (string, error) {
 	return content, nil
 }
 
-func LoadSkills(dir string) (string, error) {
-	skills, err := LoadSkillSet(dir)
-	if err != nil {
-		return "", err
-	}
-
-	builder := strings.Builder{}
-	for _, skill := range skills {
-		builder.WriteString("## ")
-		builder.WriteString(skill.Name)
-		builder.WriteString("\n")
-		builder.WriteString(skill.Content)
-		builder.WriteString("\n\n")
-	}
-
-	out := strings.TrimSpace(builder.String())
-	if out == "" {
-		return "", fmt.Errorf("no non-empty markdown skills loaded from %s", dir)
-	}
-	return out, nil
-}
-
 func LoadSkillSet(dir string) ([]Skill, error) {
 	entries, err := os.ReadDir(dir)
 	if err != nil {
diff --git a/internal/logger/trace.go b/internal/logger/trace.go
new file mode 100644
index 0000000..8eb9e5d
--- /dev/null
+++ b/internal/logger/trace.go
@@ -0,0 +1,32 @@
+package logger
+
+import (
+	"context"
+	"fmt"
+	"math/rand"
+	"time"
+)
+
+type traceIDKey struct{}
+
+func NewTraceID() string {
+	now := time.Now().UTC().UnixNano()
+	randPart := rand.Int63()
+	return fmt.Sprintf("tr-%x-%x", now, randPart)
+}
+
+func WithTraceID(ctx context.Context, traceID string) context.Context {
+	if traceID == "" {
+		return ctx
+	}
+	return context.WithValue(ctx, traceIDKey{}, traceID)
+}
+
+func TraceIDFromContext(ctx context.Context) string {
+	if ctx == nil {
+		return ""
+	}
+	v := ctx.Value(traceIDKey{})
+	s, _ := v.(string)
+	return s
+}
diff --git a/internal/memory/store_sqlite.go b/internal/memory/store_sqlite.go
index dd6868f..7c0c72d 100644
--- a/internal/memory/store_sqlite.go
+++ b/internal/memory/store_sqlite.go
@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"sort"
+	"strings"
 	"time"
 
 	"laodingbot/internal/logger"
@@ -110,6 +112,154 @@ func (s *SQLiteStore) LoadRecent(chatID string, limit int) ([]Message, error) {
 	return messages, nil
 }
 
+func (s *SQLiteStore) SaveCapabilityGap(chatID, userID, intent, reason string) error {
+	_, err := s.db.Exec(`
+		INSERT INTO capability_gaps(chat_id, user_id, intent, reason, created_at)
+		VALUES (?, ?, ?, ?, ?)
+	`, chatID, userID, intent, reason, time.Now().UTC())
+	if err != nil && s.log != nil {
+		s.log.Errorf("save capability gap failed chat_id=%s user_id=%s err=%v", chatID, userID, err)
+	}
+	return err
+}
+
+func (s *SQLiteStore) TopCapabilityGaps(limit int) ([]CapabilityGap, error) {
+	if limit <= 0 {
+		limit = 20
+	}
+	rows, err := s.db.Query(`
+		SELECT id, chat_id, user_id, intent, reason, created_at
+		FROM capability_gaps
+		ORDER BY id DESC
+		LIMIT ?
+	`, limit)
+	if err != nil {
+		if s.log != nil {
+			s.log.Errorf("top capability gaps query failed err=%v", err)
+		}
+		return nil, err
+	}
+	defer rows.Close()
+
+	out := make([]CapabilityGap, 0, limit)
+	for rows.Next() {
+		var item CapabilityGap
+		if err := rows.Scan(&item.ID, &item.ChatID, &item.UserID, &item.Intent, &item.Reason, &item.CreatedAt); err != nil {
+			return nil, err
+		}
+		out = append(out, item)
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (s *SQLiteStore) TopCapabilityGapClusters(limit int, since time.Time) ([]CapabilityGapCluster, error) {
+	if limit <= 0 {
+		limit = 20
+	}
+	if since.IsZero() {
+		since = time.Now().UTC().Add(-7 * 24 * time.Hour)
+	}
+
+	rows, err := s.db.Query(`
+		SELECT intent, reason, created_at
+		FROM capability_gaps
+		WHERE created_at >= ?
+	`, since)
+	if err != nil {
+		if s.log != nil {
+			s.log.Errorf("top capability gap clusters query failed err=%v", err)
+		}
+		return nil, err
+	}
+	defer rows.Close()
+
+	type groupKey struct {
+		IntentKey string
+		Reason    string
+	}
+	type agg struct {
+		cluster CapabilityGapCluster
+	}
+
+	groups := map[groupKey]agg{}
+	for rows.Next() {
+		var intent string
+		var reason string
+		var createdAt time.Time
+		if err := rows.Scan(&intent, &reason, &createdAt); err != nil {
+			return nil, err
+		}
+		intentKey := normalizeIntentKey(intent)
+		reason = strings.TrimSpace(reason)
+		k := groupKey{IntentKey: intentKey, Reason: reason}
+		current, ok := groups[k]
+		if !ok {
+			current = agg{cluster: CapabilityGapCluster{
+				IntentKey:    intentKey,
+				SampleIntent: strings.TrimSpace(intent),
+				Reason:       reason,
+				Count:        0,
+				LastSeenAt:   createdAt,
+			}}
+		}
+		current.cluster.Count++
+		if createdAt.After(current.cluster.LastSeenAt) {
+			current.cluster.LastSeenAt = createdAt
+		}
+		if current.cluster.SampleIntent == "" {
+			current.cluster.SampleIntent = strings.TrimSpace(intent)
+		}
+		groups[k] = current
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+
+	out := make([]CapabilityGapCluster, 0, len(groups))
+	for _, v := range groups {
+		out = append(out, v.cluster)
+	}
+	sort.Slice(out, func(i, j int) bool {
+		if out[i].Count == out[j].Count {
+			return out[i].LastSeenAt.After(out[j].LastSeenAt)
+		}
+		return out[i].Count > out[j].Count
+	})
+	if len(out) > limit {
+		out = out[:limit]
+	}
+	return out, nil
+}
+
+func normalizeIntentKey(intent string) string {
+	intent = strings.ToLower(strings.TrimSpace(intent))
+	if intent == "" {
+		return "empty"
+	}
+	intent = strings.ReplaceAll(intent, " ", "")
+	intent = strings.ReplaceAll(intent, "\t", "")
+	intent = strings.ReplaceAll(intent, "\n", "")
+	intent = strings.ReplaceAll(intent, "\r", "")
+	b := strings.Builder{}
+	for _, r := range intent {
+		if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || (r >= 0x4e00 && r <= 0x9fff) {
+			b.WriteRune(r)
+		}
+	}
+	normalized := b.String()
+	if normalized == "" {
+		return "empty"
+	}
+	runes := []rune(normalized)
+	if len(runes) > 80 {
+		normalized = string(runes[:80])
+	}
+	return normalized
+}
+
 func (s *SQLiteStore) migrate() error {
 	stmt := `
 	CREATE TABLE IF NOT EXISTS messages (
@@ -121,6 +271,15 @@ func (s *SQLiteStore) migrate() error {
 		created_at TIMESTAMP NOT NULL
 	);
 	CREATE INDEX IF NOT EXISTS idx_messages_chat_id_id ON messages(chat_id, id);
+	CREATE TABLE IF NOT EXISTS capability_gaps (
+		id INTEGER PRIMARY KEY AUTOINCREMENT,
+		chat_id TEXT NOT NULL,
+		user_id TEXT NOT NULL,
+		intent TEXT NOT NULL,
+		reason TEXT NOT NULL,
+		created_at TIMESTAMP NOT NULL
+	);
+	CREATE INDEX IF NOT EXISTS idx_capability_gaps_created_at ON capability_gaps(created_at);
 	`
 	if _, err := s.db.Exec(stmt); err != nil {
 		return fmt.Errorf("migrate schema: %w", err)
diff --git a/internal/memory/store_sqlite_test.go b/internal/memory/store_sqlite_test.go
new file mode 100644
index 0000000..cdbedc7
--- /dev/null
+++ b/internal/memory/store_sqlite_test.go
@@ -0,0 +1,64 @@
+package memory
+
+import (
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+func TestCapabilityGapStoreAndLoad(t *testing.T) {
+	dbPath := filepath.Join(t.TempDir(), "test.db")
+	store, err := NewSQLiteStore(dbPath, nil)
+	if err != nil {
+		t.Fatalf("NewSQLiteStore error: %v", err)
+	}
+	defer store.Close()
+
+	if err := store.SaveCapabilityGap("c1", "u1", "intent-a", "reason-a"); err != nil {
+		t.Fatalf("SaveCapabilityGap error: %v", err)
+	}
+	if err := store.SaveCapabilityGap("c1", "u1", "intent-b", "reason-b"); err != nil {
+		t.Fatalf("SaveCapabilityGap error: %v", err)
+	}
+
+	items, err := store.TopCapabilityGaps(10)
+	if err != nil {
+		t.Fatalf("TopCapabilityGaps error: %v", err)
+	}
+	if len(items) != 2 {
+		t.Fatalf("expected 2 items, got %d", len(items))
+	}
+	if items[0].Intent != "intent-b" {
+		t.Fatalf("expected newest first, got first intent=%s", items[0].Intent)
+	}
+}
+
+func TestTopCapabilityGapClusters(t *testing.T) {
+	dbPath := filepath.Join(t.TempDir(), "cluster.db")
+	store, err := NewSQLiteStore(dbPath, nil)
+	if err != nil {
+		t.Fatalf("NewSQLiteStore error: %v", err)
+	}
+	defer store.Close()
+
+	if err := store.SaveCapabilityGap("c1", "u1", "帮我查询 data 目录", "no_skill_matched"); err != nil {
+		t.Fatalf("SaveCapabilityGap error: %v", err)
+	}
+	if err := store.SaveCapabilityGap("c1", "u2", "帮我 查询 data 目录", "no_skill_matched"); err != nil {
+		t.Fatalf("SaveCapabilityGap error: %v", err)
+	}
+	if err := store.SaveCapabilityGap("c2", "u3", "读取配置文件内容", "tool_call_failed:file"); err != nil {
+		t.Fatalf("SaveCapabilityGap error: %v", err)
+	}
+
+	clusters, err := store.TopCapabilityGapClusters(10, time.Now().UTC().Add(-1*time.Hour))
+	if err != nil {
+		t.Fatalf("TopCapabilityGapClusters error: %v", err)
+	}
+	if len(clusters) == 0 {
+		t.Fatalf("expected non-empty clusters")
+	}
+	if clusters[0].Count < 2 {
+		t.Fatalf("expected first cluster count >= 2, got %d", clusters[0].Count)
+	}
+}
diff --git a/internal/memory/types.go b/internal/memory/types.go
new file mode 100644
index 0000000..f0c62d6
--- /dev/null
+++ b/internal/memory/types.go
@@ -0,0 +1,20 @@
+package memory
+
+import "time"
+
+type CapabilityGap struct {
+	ID      int64
+	ChatID  string
+	UserID  string
+	Intent  string
+	Reason  string
+	CreatedAt time.Time
+}
+
+type CapabilityGapCluster struct {
+	IntentKey    string
+	SampleIntent string
+	Reason       string
+	Count        int
+	LastSeenAt   time.Time
+}
diff --git a/internal/runtimews/bootstrap.go b/internal/runtimews/bootstrap.go
new file mode 100644
index 0000000..cc1b965
--- /dev/null
+++ b/internal/runtimews/bootstrap.go
@@ -0,0 +1,109 @@
+package runtimews
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+const envWorkspaceDir = "AGENT_WORKSPACE_DIR"
+
+func PrepareFromEnv() (string, error) {
+	workspaceDir := strings.TrimSpace(os.Getenv(envWorkspaceDir))
+	if workspaceDir == "" {
+		workspaceDir = filepath.Join(".", "workspace", "agent_runtime")
+	}
+	absWorkspace, err := filepath.Abs(workspaceDir)
+	if err != nil {
+		return "", err
+	}
+	if err := os.MkdirAll(absWorkspace, 0o755); err != nil {
+		return "", err
+	}
+
+	if err := seedRuntimeWorkspace(absWorkspace); err != nil {
+		return "", err
+	}
+
+	if err := os.Setenv(envWorkspaceDir, absWorkspace); err != nil {
+		return "", err
+	}
+	_ = os.Setenv("CONFIG_ENV_FILE", filepath.Join(absWorkspace, "configs", "env"))
+	return absWorkspace, nil
+}
+
+func seedRuntimeWorkspace(workspaceRoot string) error {
+	seedDirs := []string{"configs", "data", "bot_context", "skills"}
+	for _, name := range seedDirs {
+		src := filepath.Join(".", name)
+		dst := filepath.Join(workspaceRoot, name)
+		if err := copyDirIfMissing(src, dst); err != nil {
+			return fmt.Errorf("seed %s failed: %w", name, err)
+		}
+	}
+	if err := os.MkdirAll(filepath.Join(workspaceRoot, "workspace"), 0o755); err != nil {
+		return err
+	}
+	return nil
+}
+
+func copyDirIfMissing(src, dst string) error {
+	info, err := os.Stat(src)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return err
+	}
+	if !info.IsDir() {
+		return nil
+	}
+	if err := os.MkdirAll(dst, 0o755); err != nil {
+		return err
+	}
+
+	return filepath.WalkDir(src, func(path string, d os.DirEntry, walkErr error) error {
+		if walkErr != nil {
+			return walkErr
+		}
+		rel, err := filepath.Rel(src, path)
+		if err != nil {
+			return err
+		}
+		target := filepath.Join(dst, rel)
+		if d.IsDir() {
+			return os.MkdirAll(target, 0o755)
+		}
+		if _, err := os.Stat(target); err == nil {
+			return nil
+		}
+		if err := copyFile(path, target); err != nil {
+			return err
+		}
+		return nil
+	})
+}
+
+func copyFile(src, dst string) error {
+	in, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+
+	if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
+		return err
+	}
+	out, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+
+	if _, err := io.Copy(out, in); err != nil {
+		return err
+	}
+	return out.Sync()
+}
diff --git a/internal/toolhost/client.go b/internal/toolhost/client.go
new file mode 100644
index 0000000..9f71b71
--- /dev/null
+++ b/internal/toolhost/client.go
@@ -0,0 +1,303 @@
+package toolhost
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"laodingbot/internal/logger"
+)
+
+type ClientConfig struct {
+	ExecutablePath    string
+	Args              []string
+	WorkDir           string
+	Env               []string
+	CallTimeout       time.Duration
+	HeartbeatInterval time.Duration
+	MaxConcurrency    int
+}
+
+type Client struct {
+	cfg ClientConfig
+	log *logger.Logger
+
+	cmd      *exec.Cmd
+	stdin    io.WriteCloser
+	stdout   io.ReadCloser
+	decoder  *json.Decoder
+	encoder  *json.Encoder
+
+	seq int64
+
+	lifecycleMu sync.Mutex
+	ioMu        sync.Mutex
+	sem         chan struct{}
+
+	closed int32
+}
+
+func NewClient(cfg ClientConfig, log *logger.Logger) (*Client, error) {
+	if cfg.ExecutablePath == "" {
+		return nil, fmt.Errorf("empty executable path")
+	}
+	if cfg.CallTimeout <= 0 {
+		cfg.CallTimeout = 15 * time.Second
+	}
+	if cfg.HeartbeatInterval <= 0 {
+		cfg.HeartbeatInterval = 5 * time.Second
+	}
+	if cfg.MaxConcurrency <= 0 {
+		cfg.MaxConcurrency = 4
+	}
+
+	c := &Client{
+		cfg: cfg,
+		log: log,
+		sem: make(chan struct{}, cfg.MaxConcurrency),
+	}
+	if err := c.ensureStartedLocked(); err != nil {
+		return nil, err
+	}
+	go c.heartbeatLoop()
+	return c, nil
+}
+
+func (c *Client) Close() error {
+	atomic.StoreInt32(&c.closed, 1)
+	c.lifecycleMu.Lock()
+	defer c.lifecycleMu.Unlock()
+	return c.stopLocked()
+}
+
+func (c *Client) ToolList(ctx context.Context) ([]toolInfo, error) {
+	var out []toolInfo
+	if err := c.call(ctx, "tool.list", map[string]string{}, &out); err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *Client) ToolCall(ctx context.Context, name, input string) (string, error) {
+	var out toolCallResult
+	if err := c.call(ctx, "tool.call", toolCallParams{Name: name, Input: input}, &out); err != nil {
+		return "", err
+	}
+	if out.Error != "" {
+		return out.Output, fmt.Errorf(out.Error)
+	}
+	return out.Output, nil
+}
+
+func (c *Client) call(ctx context.Context, method string, params interface{}, result interface{}) error {
+	if atomic.LoadInt32(&c.closed) == 1 {
+		return fmt.Errorf("toolhost client is closed")
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	select {
+	case c.sem <- struct{}{}:
+		defer func() { <-c.sem }()
+	case <-ctx.Done():
+		return ctx.Err()
+	}
+
+	var lastErr error
+	for attempt := 0; attempt < 2; attempt++ {
+		err := c.callOnce(ctx, method, params, result)
+		if err == nil {
+			return nil
+		}
+		lastErr = err
+		if atomic.LoadInt32(&c.closed) == 1 {
+			return err
+		}
+		if c.log != nil {
+			c.log.Warnf("toolhost rpc call failed method=%s attempt=%d err=%v", method, attempt+1, err)
+		}
+		if restartErr := c.restart(); restartErr != nil {
+			return fmt.Errorf("rpc failed=%v; restart failed=%w", err, restartErr)
+		}
+	}
+	return fmt.Errorf("toolhost rpc call failed after retry method=%s err=%v", method, lastErr)
+}
+
+func (c *Client) callOnce(ctx context.Context, method string, params interface{}, result interface{}) error {
+	if err := c.ensureStarted(); err != nil {
+		return err
+	}
+
+	callCtx, cancel := context.WithTimeout(ctx, c.cfg.CallTimeout)
+	defer cancel()
+	if err := callCtx.Err(); err != nil {
+		return err
+	}
+
+	id := atomic.AddInt64(&c.seq, 1)
+	payload, err := json.Marshal(params)
+	if err != nil {
+		return err
+	}
+
+	req := rpcRequest{
+		JSONRPC: "2.0",
+		ID:      id,
+		Method:  method,
+		Params:  payload,
+	}
+
+	c.ioMu.Lock()
+	defer c.ioMu.Unlock()
+
+	if err := c.encoder.Encode(req); err != nil {
+		return err
+	}
+
+	var resp rpcResponse
+	if err := c.decoder.Decode(&resp); err != nil {
+		return err
+	}
+	if resp.ID != id {
+		return fmt.Errorf("rpc response id mismatch expected=%d got=%d", id, resp.ID)
+	}
+	if resp.Error != nil {
+		return fmt.Errorf("rpc error code=%d msg=%s", resp.Error.Code, resp.Error.Message)
+	}
+	if result == nil {
+		return nil
+	}
+	raw, err := json.Marshal(resp.Result)
+	if err != nil {
+		return err
+	}
+	return json.Unmarshal(raw, result)
+}
+
+func (c *Client) heartbeatLoop() {
+	ticker := time.NewTicker(c.cfg.HeartbeatInterval)
+	defer ticker.Stop()
+
+	for range ticker.C {
+		if atomic.LoadInt32(&c.closed) == 1 {
+			return
+		}
+		hbCtx, cancel := context.WithTimeout(context.Background(), c.cfg.CallTimeout)
+		var out map[string]string
+		err := c.call(hbCtx, "ping", map[string]string{}, &out)
+		cancel()
+		if err == nil {
+			continue
+		}
+		if c.log != nil {
+			c.log.Warnf("toolhost heartbeat failed err=%v", err)
+		}
+		_ = c.restart()
+	}
+}
+
+func (c *Client) ensureStarted() error {
+	c.lifecycleMu.Lock()
+	defer c.lifecycleMu.Unlock()
+	return c.ensureStartedLocked()
+}
+
+func (c *Client) ensureStartedLocked() error {
+	if c.cmd != nil && c.cmd.Process != nil {
+		return nil
+	}
+
+	cmd := exec.Command(c.cfg.ExecutablePath, c.cfg.Args...)
+	cmd.Dir = c.cfg.WorkDir
+	if len(c.cfg.Env) > 0 {
+		cmd.Env = append(os.Environ(), c.cfg.Env...)
+	}
+
+	stdin, err := cmd.StdinPipe()
+	if err != nil {
+		return err
+	}
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return err
+	}
+	stderr, err := cmd.StderrPipe()
+	if err != nil {
+		return err
+	}
+
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+
+	go c.logStderr(stderr)
+	go func() {
+		_ = cmd.Wait()
+		c.lifecycleMu.Lock()
+		if c.cmd == cmd {
+			c.cmd = nil
+			c.stdin = nil
+			c.stdout = nil
+			c.encoder = nil
+			c.decoder = nil
+		}
+		c.lifecycleMu.Unlock()
+	}()
+
+	c.cmd = cmd
+	c.stdin = stdin
+	c.stdout = stdout
+	c.encoder = json.NewEncoder(stdin)
+	c.decoder = json.NewDecoder(bufio.NewReader(stdout))
+
+	if c.log != nil {
+		c.log.Infof("toolhost started pid=%d", cmd.Process.Pid)
+	}
+	return nil
+}
+
+func (c *Client) restart() error {
+	c.lifecycleMu.Lock()
+	defer c.lifecycleMu.Unlock()
+	if err := c.stopLocked(); err != nil {
+		if c.log != nil {
+			c.log.Warnf("toolhost stop during restart failed err=%v", err)
+		}
+	}
+	return c.ensureStartedLocked()
+}
+
+func (c *Client) stopLocked() error {
+	if c.cmd == nil || c.cmd.Process == nil {
+		return nil
+	}
+	proc := c.cmd.Process
+	if err := proc.Kill(); err != nil {
+		return err
+	}
+	c.cmd = nil
+	c.stdin = nil
+	c.stdout = nil
+	c.encoder = nil
+	c.decoder = nil
+	return nil
+}
+
+func (c *Client) logStderr(r io.Reader) {
+	if c.log == nil {
+		_, _ = io.Copy(io.Discard, r)
+		return
+	}
+	s := bufio.NewScanner(r)
+	for s.Scan() {
+		c.log.Warnf("toolhost stderr: %s", s.Text())
+	}
+}
diff --git a/internal/toolhost/protocol.go b/internal/toolhost/protocol.go
new file mode 100644
index 0000000..ca4cb05
--- /dev/null
+++ b/internal/toolhost/protocol.go
@@ -0,0 +1,37 @@
+package toolhost
+
+import "encoding/json"
+
+type rpcRequest struct {
+	JSONRPC string          `json:"jsonrpc"`
+	ID      int64           `json:"id"`
+	Method  string          `json:"method"`
+	Params  json.RawMessage `json:"params,omitempty"`
+}
+
+type rpcResponse struct {
+	JSONRPC string      `json:"jsonrpc"`
+	ID      int64       `json:"id"`
+	Result  interface{} `json:"result,omitempty"`
+	Error   *rpcError   `json:"error,omitempty"`
+}
+
+type rpcError struct {
+	Code    int    `json:"code"`
+	Message string `json:"message"`
+}
+
+type toolInfo struct {
+	Name        string `json:"name"`
+	Description string `json:"description"`
+}
+
+type toolCallParams struct {
+	Name  string `json:"name"`
+	Input string `json:"input"`
+}
+
+type toolCallResult struct {
+	Output string `json:"output"`
+	Error  string `json:"error,omitempty"`
+}
diff --git a/internal/toolhost/remote_tool.go b/internal/toolhost/remote_tool.go
new file mode 100644
index 0000000..8b2242c
--- /dev/null
+++ b/internal/toolhost/remote_tool.go
@@ -0,0 +1,36 @@
+package toolhost
+
+import (
+	"context"
+	"time"
+)
+
+type RemoteTool struct {
+	name        string
+	description string
+	client      *Client
+	callTimeout time.Duration
+}
+
+func NewRemoteTool(name, description string, callTimeout time.Duration, client *Client) *RemoteTool {
+	if callTimeout <= 0 {
+		callTimeout = 15 * time.Second
+	}
+	return &RemoteTool{name: name, description: description, client: client, callTimeout: callTimeout}
+}
+
+func (t *RemoteTool) Name() string { return t.name }
+
+func (t *RemoteTool) Description() string { return t.description }
+
+func (t *RemoteTool) Call(ctx context.Context, input string) (string, error) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	if _, ok := ctx.Deadline(); !ok {
+		var cancel context.CancelFunc
+		ctx, cancel = context.WithTimeout(ctx, t.callTimeout)
+		defer cancel()
+	}
+	return t.client.ToolCall(ctx, t.name, input)
+}
diff --git a/internal/toolhost/runtime.go b/internal/toolhost/runtime.go
new file mode 100644
index 0000000..efe148e
--- /dev/null
+++ b/internal/toolhost/runtime.go
@@ -0,0 +1,45 @@
+package toolhost
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"laodingbot/internal/config"
+	"laodingbot/internal/logger"
+	"laodingbot/internal/tools"
+	"laodingbot/internal/tools/filetool"
+	"laodingbot/internal/tools/shelltool"
+)
+
+func RunChild(ctx context.Context, cfg config.Config, log *logger.Logger) error {
+	var registryLog *logger.Logger
+	var fileLog *logger.Logger
+	var shellLog *logger.Logger
+	var serverLog *logger.Logger
+	if log != nil {
+		log.Infof("toolhost child starting")
+		registryLog = log.WithComponent("toolhost.registry")
+		fileLog = log.WithComponent("toolhost.file")
+		shellLog = log.WithComponent("toolhost.shell")
+		serverLog = log.WithComponent("toolhost.server")
+	}
+	registry := tools.NewRegistry(registryLog)
+	registry.Register(filetool.New(cfg.Security.AllowedDirs, cfg.ToolOutputMaxChars, fileLog))
+	registry.Register(shelltool.New(
+		cfg.Security.AllowedCommands,
+		cfg.Security.WorkDir,
+		time.Duration(cfg.ToolCallTimeoutSec)*time.Second,
+		cfg.ToolOutputMaxChars,
+		shellLog,
+	))
+
+	server := NewServer(registry, serverLog)
+	if err := server.Serve(ctx, stdin(), stdout()); err != nil && ctx.Err() == nil {
+		return fmt.Errorf("toolhost serve failed: %w", err)
+	}
+	if log != nil {
+		log.Infof("toolhost child stopped")
+	}
+	return nil
+}
diff --git a/internal/toolhost/server.go b/internal/toolhost/server.go
new file mode 100644
index 0000000..a2114f0
--- /dev/null
+++ b/internal/toolhost/server.go
@@ -0,0 +1,106 @@
+package toolhost
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"errors"
+	"io"
+	"sort"
+	"strings"
+	"sync"
+
+	"laodingbot/internal/logger"
+	"laodingbot/internal/tools"
+)
+
+type Server struct {
+	registry *tools.Registry
+	log      *logger.Logger
+
+	writeMu sync.Mutex
+}
+
+func NewServer(registry *tools.Registry, log *logger.Logger) *Server {
+	return &Server{registry: registry, log: log}
+}
+
+func (s *Server) Serve(ctx context.Context, reader io.Reader, writer io.Writer) error {
+	dec := json.NewDecoder(bufio.NewReader(reader))
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
+
+		var req rpcRequest
+		if err := dec.Decode(&req); err != nil {
+			if errors.Is(err, io.EOF) {
+				return nil
+			}
+			if s.log != nil {
+				s.log.Errorf("toolhost decode request failed err=%v", err)
+			}
+			return err
+		}
+
+		resp := s.handleRequest(ctx, req)
+		if err := s.writeResponse(writer, resp); err != nil {
+			if s.log != nil {
+				s.log.Errorf("toolhost write response failed err=%v", err)
+			}
+			return err
+		}
+	}
+}
+
+func (s *Server) handleRequest(ctx context.Context, req rpcRequest) rpcResponse {
+	resp := rpcResponse{JSONRPC: "2.0", ID: req.ID}
+
+	switch req.Method {
+	case "ping":
+		resp.Result = map[string]string{"status": "ok"}
+		return resp
+	case "tool.list":
+		list := s.registry.List()
+		sort.Slice(list, func(i, j int) bool {
+			return strings.ToLower(list[i].Name()) < strings.ToLower(list[j].Name())
+		})
+		infos := make([]toolInfo, 0, len(list))
+		for _, t := range list {
+			infos = append(infos, toolInfo{Name: t.Name(), Description: t.Description()})
+		}
+		resp.Result = infos
+		return resp
+	case "tool.call":
+		var p toolCallParams
+		if err := json.Unmarshal(req.Params, &p); err != nil {
+			resp.Error = &rpcError{Code: -32602, Message: "invalid params"}
+			return resp
+		}
+		name := strings.TrimSpace(strings.ToLower(p.Name))
+		tool, ok := s.registry.Get(name)
+		if !ok {
+			resp.Error = &rpcError{Code: -32004, Message: "tool not found"}
+			return resp
+		}
+		out, err := tool.Call(ctx, p.Input)
+		result := toolCallResult{Output: out}
+		if err != nil {
+			result.Error = err.Error()
+		}
+		resp.Result = result
+		return resp
+	default:
+		resp.Error = &rpcError{Code: -32601, Message: "method not found"}
+		return resp
+	}
+}
+
+func (s *Server) writeResponse(writer io.Writer, resp rpcResponse) error {
+	s.writeMu.Lock()
+	defer s.writeMu.Unlock()
+	enc := json.NewEncoder(writer)
+	return enc.Encode(resp)
+}
diff --git a/internal/toolhost/stdio.go b/internal/toolhost/stdio.go
new file mode 100644
index 0000000..022b41a
--- /dev/null
+++ b/internal/toolhost/stdio.go
@@ -0,0 +1,14 @@
+package toolhost
+
+import (
+	"io"
+	"os"
+)
+
+func stdin() io.Reader {
+	return os.Stdin
+}
+
+func stdout() io.Writer {
+	return os.Stdout
+}
diff --git a/internal/tools/filetool/filetool.go b/internal/tools/filetool/filetool.go
index 71c76ce..0ce33dc 100644
--- a/internal/tools/filetool/filetool.go
+++ b/internal/tools/filetool/filetool.go
@@ -12,10 +12,11 @@ import (
 
 type Tool struct {
 	allowedDirs []string
+	maxOutputChars int
 	log         *logger.Logger
 }
 
-func New(allowedDirs []string, log *logger.Logger) *Tool {
+func New(allowedDirs []string, maxOutputChars int, log *logger.Logger) *Tool {
 	normalized := make([]string, 0, len(allowedDirs))
 	for _, dir := range allowedDirs {
 		abs, err := filepath.Abs(strings.TrimSpace(dir))
@@ -23,16 +24,19 @@ func New(allowedDirs []string, log *logger.Logger) *Tool {
 			normalized = append(normalized, filepath.Clean(abs))
 		}
 	}
-	if log != nil {
-		log.Infof("file tool initialized allowed_dirs=%d", len(normalized))
+	if maxOutputChars <= 0 {
+		maxOutputChars = 4000
 	}
-	return &Tool{allowedDirs: normalized, log: log}
+	if log != nil {
+		log.Infof("file tool initialized allowed_dirs=%d max_output_chars=%d", len(normalized), maxOutputChars)
+	}
+	return &Tool{allowedDirs: normalized, maxOutputChars: maxOutputChars, log: log}
 }
 
 func (t *Tool) Name() string { return "file" }
 
 func (t *Tool) Description() string {
-	return "File operations with command format: read <path> | write <path>\\n<content>"
+	return "File operations with command format: read <path> | list <path> | write <path>\\n<content>"
 }
 
 func (t *Tool) Call(_ context.Context, input string) (string, error) {
@@ -49,6 +53,16 @@ func (t *Tool) Call(_ context.Context, input string) (string, error) {
 			}
 			return "", err
 		}
+		info, err := os.Stat(resolved)
+		if err != nil {
+			if t.log != nil {
+				t.log.Errorf("file read stat failed path=%s err=%v", resolved, err)
+			}
+			return "", err
+		}
+		if info.IsDir() {
+			return "", fmt.Errorf("PATH_IS_DIRECTORY: %s (use 'list <path>' first)", resolved)
+		}
 		b, err := os.ReadFile(resolved)
 		if err != nil {
 			if t.log != nil {
@@ -59,7 +73,49 @@ func (t *Tool) Call(_ context.Context, input string) (string, error) {
 		if t.log != nil {
 			t.log.Infof("file read success path=%s bytes=%d", resolved, len(b))
 		}
-		return string(b), nil
+		out := string(b)
+		if len(out) > t.maxOutputChars {
+			out = out[:t.maxOutputChars]
+		}
+		return out, nil
+	}
+
+	if strings.HasPrefix(input, "list ") {
+		path := strings.TrimSpace(strings.TrimPrefix(input, "list "))
+		resolved, err := t.resolveAllowed(path)
+		if err != nil {
+			if t.log != nil {
+				t.log.Warnf("file list denied path=%s err=%v", path, err)
+			}
+			return "", err
+		}
+		entries, err := os.ReadDir(resolved)
+		if err != nil {
+			if t.log != nil {
+				t.log.Errorf("file list failed path=%s err=%v", resolved, err)
+			}
+			return "", err
+		}
+		b := strings.Builder{}
+		for _, e := range entries {
+			name := e.Name()
+			if e.IsDir() {
+				name += "/"
+			}
+			b.WriteString(name)
+			b.WriteString("\n")
+			if b.Len() >= t.maxOutputChars {
+				break
+			}
+		}
+		out := strings.TrimSpace(b.String())
+		if out == "" {
+			return "(empty)", nil
+		}
+		if len(out) > t.maxOutputChars {
+			out = out[:t.maxOutputChars]
+		}
+		return out, nil
 	}
 
 	if strings.HasPrefix(input, "write ") {
@@ -97,9 +153,18 @@ func (t *Tool) Call(_ context.Context, input string) (string, error) {
 }
 
 func (t *Tool) resolveAllowed(path string) (string, error) {
-	abs, err := filepath.Abs(path)
-	if err != nil {
-		return "", err
+	base := strings.TrimSpace(os.Getenv("AGENT_WORKSPACE_DIR"))
+	var abs string
+	var err error
+	if filepath.IsAbs(path) {
+		abs = path
+	} else if base != "" {
+		abs = filepath.Join(base, path)
+	} else {
+		abs, err = filepath.Abs(path)
+		if err != nil {
+			return "", err
+		}
 	}
 	abs = filepath.Clean(abs)
 	for _, allowed := range t.allowedDirs {
diff --git a/internal/tools/filetool/filetool_test.go b/internal/tools/filetool/filetool_test.go
new file mode 100644
index 0000000..c9e736d
--- /dev/null
+++ b/internal/tools/filetool/filetool_test.go
@@ -0,0 +1,66 @@
+package filetool
+
+import (
+	"context"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestReadDeniedOutsideAllowedDir(t *testing.T) {
+	allowed := t.TempDir()
+	tool := New([]string{allowed}, 4000, nil)
+	_, err := tool.Call(context.Background(), "read ../outside.txt")
+	if err == nil {
+		t.Fatal("expected path denied error")
+	}
+}
+
+func TestWriteAndReadInsideAllowedDir(t *testing.T) {
+	allowed := t.TempDir()
+	tool := New([]string{allowed}, 4000, nil)
+	path := filepath.Join(allowed, "a.txt")
+
+	_, err := tool.Call(context.Background(), "write "+path+"\nhello")
+	if err != nil {
+		t.Fatalf("write error: %v", err)
+	}
+	out, err := tool.Call(context.Background(), "read "+path)
+	if err != nil {
+		t.Fatalf("read error: %v", err)
+	}
+	if out != "hello" {
+		t.Fatalf("unexpected read output: %q", out)
+	}
+}
+
+func TestReadDirectoryReturnsStructuredError(t *testing.T) {
+	allowed := t.TempDir()
+	tool := New([]string{allowed}, 4000, nil)
+
+	_, err := tool.Call(context.Background(), "read "+allowed)
+	if err == nil {
+		t.Fatal("expected directory read error")
+	}
+	if !strings.Contains(err.Error(), "PATH_IS_DIRECTORY") {
+		t.Fatalf("expected PATH_IS_DIRECTORY, got: %v", err)
+	}
+}
+
+func TestListDirectory(t *testing.T) {
+	allowed := t.TempDir()
+	tool := New([]string{allowed}, 4000, nil)
+	path := filepath.Join(allowed, "x.txt")
+
+	_, err := tool.Call(context.Background(), "write "+path+"\nhello")
+	if err != nil {
+		t.Fatalf("write error: %v", err)
+	}
+	out, err := tool.Call(context.Background(), "list "+allowed)
+	if err != nil {
+		t.Fatalf("list error: %v", err)
+	}
+	if !strings.Contains(out, "x.txt") {
+		t.Fatalf("expected x.txt in list output, got: %q", out)
+	}
+}
diff --git a/internal/tools/shelltool/shelltool.go b/internal/tools/shelltool/shelltool.go
index 5b5a0d6..9a0a07d 100644
--- a/internal/tools/shelltool/shelltool.go
+++ b/internal/tools/shelltool/shelltool.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"os/exec"
 	"path/filepath"
+	"runtime"
 	"strings"
 	"time"
 
@@ -15,10 +16,11 @@ type Tool struct {
 	allowedCommands map[string]struct{}
 	workDir         string
 	timeout         time.Duration
+	maxOutputChars  int
 	log             *logger.Logger
 }
 
-func New(allowed []string, workDir string, timeout time.Duration, log *logger.Logger) *Tool {
+func New(allowed []string, workDir string, timeout time.Duration, maxOutputChars int, log *logger.Logger) *Tool {
 	set := make(map[string]struct{}, len(allowed))
 	for _, c := range allowed {
 		cmd := strings.TrimSpace(c)
@@ -33,10 +35,13 @@ func New(allowed []string, workDir string, timeout time.Duration, log *logger.Lo
 	if timeout <= 0 {
 		timeout = 15 * time.Second
 	}
-	if log != nil {
-		log.Infof("shell tool initialized allowed_commands=%d work_dir=%s timeout=%s", len(set), absDir, timeout)
+	if maxOutputChars <= 0 {
+		maxOutputChars = 4000
 	}
-	return &Tool{allowedCommands: set, workDir: absDir, timeout: timeout, log: log}
+	if log != nil {
+		log.Infof("shell tool initialized allowed_commands=%d work_dir=%s timeout=%s max_output_chars=%d", len(set), absDir, timeout, maxOutputChars)
+	}
+	return &Tool{allowedCommands: set, workDir: absDir, timeout: timeout, maxOutputChars: maxOutputChars, log: log}
 }
 
 func (t *Tool) Name() string { return "shell" }
@@ -72,14 +77,21 @@ func (t *Tool) Call(ctx context.Context, input string) (string, error) {
 	cmd := exec.CommandContext(runCtx, base, parts[1:]...)
 	cmd.Dir = t.workDir
 	out, err := cmd.CombinedOutput()
+	outText := string(out)
+	if len(outText) > t.maxOutputChars {
+		outText = outText[:t.maxOutputChars]
+	}
 	if err != nil {
 		if t.log != nil {
-			t.log.Errorf("shell command failed command=%s full_command=%q err=%v output_bytes=%d output=%q", base, trimmed, err, len(out), string(out))
+			t.log.Errorf("shell command failed command=%s full_command=%q err=%v output_bytes=%d output=%q", base, trimmed, err, len(out), outText)
 		}
-		return string(out), err
+		if runtime.GOOS == "windows" && strings.Contains(strings.ToLower(err.Error()), "executable file not found") {
+			return outText, fmt.Errorf("command not executable in current windows environment: %s", base)
+		}
+		return outText, err
 	}
 	if t.log != nil {
-		t.log.Infof("shell command success command=%s full_command=%q output_bytes=%d output=%q", base, trimmed, len(out), string(out))
+		t.log.Infof("shell command success command=%s full_command=%q output_bytes=%d output=%q", base, trimmed, len(out), outText)
 	}
-	return string(out), nil
+	return outText, nil
 }
diff --git a/internal/tools/shelltool/shelltool_test.go b/internal/tools/shelltool/shelltool_test.go
new file mode 100644
index 0000000..22882b1
--- /dev/null
+++ b/internal/tools/shelltool/shelltool_test.go
@@ -0,0 +1,23 @@
+package shelltool
+
+import (
+	"context"
+	"testing"
+	"time"
+)
+
+func TestCallRejectsEmptyCommand(t *testing.T) {
+	tool := New([]string{"echo"}, ".", time.Second, 4000, nil)
+	_, err := tool.Call(context.Background(), "   ")
+	if err == nil {
+		t.Fatal("expected error for empty command")
+	}
+}
+
+func TestCallRejectsNonAllowlistedCommand(t *testing.T) {
+	tool := New([]string{"echo"}, ".", time.Second, 4000, nil)
+	_, err := tool.Call(context.Background(), "cat test.txt")
+	if err == nil {
+		t.Fatal("expected allowlist rejection")
+	}
+}
diff --git a/skills/filesystem_query/skill.md b/skills/filesystem_query/skill.md
index cd3eb12..76f278d 100644
--- a/skills/filesystem_query/skill.md
+++ b/skills/filesystem_query/skill.md
@@ -1,19 +1,122 @@
-# Skill: Filesystem Query
+---
+name: 文件系统查询专家
+description: 查询文件系统中的文件和目录信息，支持基本的路径解析和权限检查。
+---
 
-用途：
-- 查询目录下文件
-- 检查路径是否存在
-- 快速列出文件树
+# Skill: 文件系统查询专家
 
-建议工具：
-- `shell`
+## 1. 适用范围
+当用户提出以下诉求时触发本技能：
+- 查询某个目录中有什么文件/子目录
+- 查找特定名称或后缀的文件（如 `.md`、`.log`）
+- 检查文件或目录是否存在
+- 读取指定文件内容并给出摘要
+- 比较两个路径下的文件差异（仅基础层面）
+
+不适用场景：
+- 需要修改系统配置、安装软件、执行高风险命令
+- 需要递归扫描超大目录并输出完整明细（应先收敛范围）
+
+## 2. 可用工具与约束
+优先工具：
+- `shell`：用于目录枚举、检索、路径探测
+- `file`：用于读取文件内容（必要时）
+
+必须遵守：
+- 仅在 `ALLOWED_DIRS` 白名单路径范围内操作
+- 仅使用 `ALLOWED_COMMANDS` 中允许的命令
+- 先做最小探测，再逐步扩大范围，避免一次性重命令
+- 结果必须基于工具真实输出，禁止臆测
+
+## 3. 执行策略（ReAct 指南）
+1. 明确用户目标：目录列表 / 文件查找 / 内容读取 / 存在性检查。
+2. 抽取目标路径：若用户未给路径，默认从 `WORK_DIR` 开始，并向用户说明。
+3. 先进行低成本探测：
+	- 目录查询可先列顶层
+	- 文件查询可先限定后缀与最大层级
+4. 若输出过长：
+	- 先返回摘要（数量、关键文件、下一步建议）
+	- 再询问是否继续细化
+5. 若命令失败：
+	- 返回错误原因（路径不存在、权限不足、命令受限等）
+	- 提供最短修复建议（改路径、缩范围、提供更具体条件）
+
+## 4. 推荐动作模板
+### 4.1 列出目录内容
+- 目标：查看目录下文件与子目录
+- 行为：先顶层，再按需递归
+
+### 4.2 检索特定文件
+- 目标：查找匹配名称/后缀的文件
+- 行为：限制目录范围与深度，优先返回命中摘要
+
+### 4.3 读取文件内容
+- 目标：读取并总结关键信息
+- 行为：先确认文件存在，再使用 `file` 工具读取
+
+### 4.4 路径存在性检查
+- 目标：确认路径是否存在、类型是文件还是目录
+- 行为：给出明确判断与后续可执行动作
+
+## 5. 输出规范
+回复格式建议：
+1. 结论：一句话说明查到了什么。
+2. 依据：列出关键路径/文件名/数量。
+3. 风险与限制：说明白名单、权限、输出截断等限制。
+4. 下一步：给用户 1-2 个可选动作。
+
+示例结构：
+- 结论：已在 `<目标目录>` 找到 `<N>` 个匹配文件。
+- 关键结果：`<文件1>`、`<文件2>`、`<文件3>`。
+- 限制说明：当前仅扫描到 `<层级/范围>`，如需可继续深度扫描。
+- 下一步建议：是否按时间排序、按后缀分组或读取某个文件内容。
+
+## 6. 失败回退策略
+- 路径不合法或越界：明确提示“路径不在允许范围内”。
+- 命令不可用：提示“该命令不在允许列表中”，并给出替代方案。
+- 输出为空：说明“未命中”，并建议放宽条件（目录、后缀、关键词）。
+- 结果不确定：必须标注不确定点，不得给出确定性结论。
+
+## 7. 技能边界声明
+本技能仅负责“文件系统信息查询与只读分析”，不主动写文件、不执行高风险操作。
+若用户要求写入或修改文件，需切换到对应写入类技能或先征得明确授权。
+
+## 8. Windows 适配版（重点）
+
+### 8.1 环境事实
+当前项目在 Windows 上运行时，`shell` 工具是通过子进程直接执行命令（`exec.Command`），不是通过 PowerShell 别名层执行。
+因此：
+- 在 PowerShell 里可用的别名（如 `ls`、`cat`）不一定能直接被 `shell` 工具调用。
+- 技能在 Windows 上应优先采用“保守策略”：先尝试白名单命令，失败后回退到 `file` 工具读取。
+
+### 8.2 Windows 下推荐执行优先级
+1. **路径确认优先**：先确认用户给的是相对路径还是绝对路径。
+2. **目录探测优先**：先做小范围探测，避免全盘递归。
+3. **读取优先 `file` 工具**：当目标是读取具体文件内容时，优先使用 `file read <path>`。
+4. **命令失败即回退**：若 `shell` 命令在 Windows 不可执行，立即说明并切换 `file` 路径方案。
+
+### 8.3 Windows 典型任务建议
+#### A. 查询目录内容
+- 首选：使用允许命令进行轻量目录探测。
+- 若失败：提示“当前命令在 Windows 子进程环境不可用”，并让用户指定更具体文件路径，再用 `file` 工具读取。
+
+#### B. 检索特定后缀文件
+- 首选：在有限目录层级内检索，避免大范围扫描。
+- 若失败：让用户给出更精确子目录，然后分段检索。
+
+#### C. 读取文件内容
+- 直接使用 `file read <path>`，这是 Windows 下最稳定路径。
+
+### 8.4 Windows 输出补充规范
+在回复中增加一行环境说明：
+- “当前为 Windows 运行环境，部分 shell 命令可能受可执行文件与白名单限制影响。”
+
+### 8.5 Windows 失败回退模板
+- 命令不可执行：
+	- 说明：命令在当前 Windows 子进程中不可用或不在白名单。
+	- 回退：请提供更具体文件路径，我将改用 `file` 工具读取并汇总结果。
+- 路径分隔符问题：
+	- 说明：路径格式可能不兼容。
+	- 回退：建议统一使用项目相对路径（如 `./workspace/...`）。
 
-常见动作：
-- 查看目录内容：`ls -la <dir>`
-- 递归列出文件：`find <dir> -maxdepth 3 -type f`
-- 查询特定后缀：`find <dir> -name "*.md"`
 
-触发信号：
-- 用户提问“某目录有什么文件”
-- 用户提问“帮我查一下 data 目录内容”
-- 用户提问“列出/检索/查找 文件”
\ No newline at end of file
diff --git a/skills/skill_builder/skill.md b/skills/skill_builder/skill.md
new file mode 100644
index 0000000..439422c
--- /dev/null
+++ b/skills/skill_builder/skill.md
@@ -0,0 +1,52 @@
+---
+name: 创建skill
+description: 当用户请求新增能力或系统发现能力缺口时，自动创建并完善 skill，必要时生成新 tool 代码并完成测试。
+---
+
+# Skill: 创建skill
+
+## 1. 触发条件
+当出现以下任一情况时触发：
+- 用户明确要求“创建/新增一个 skill”。
+- 现有技能无法覆盖用户目标（无 skill 命中或执行失败）。
+- 高频 capability_gap 指向同一类能力缺口。
+
+## 2. 目标
+1. 生成可执行的 `skills/<skill_name>/skill.md`。
+2. 若需要新工具，生成 `internal/tools/<tool_name>/` 下 Go 代码。
+3. 生成或补充测试代码并执行 `go test ./...`。
+4. 输出结果中说明新增内容、测试结果与后续建议。
+
+## 3. 可用工具
+- `file`：创建目录与文件、写入 skill/tool/test 内容。
+- `shell`：执行测试、检索代码位置、检查文件结构。
+
+## 4. 执行流程
+1. **澄清能力边界**：提炼该 skill 要解决的问题与触发信号。
+2. **命名与路径规划**：
+   - 技能路径：`skills/<skill_name>/skill.md`
+   - 工具路径（如需）：`internal/tools/<tool_name>/...`
+3. **创建 skill 文件**：写入完整字段（用途、触发、工具、ReAct 指南、失败回退、输出规范）。
+4. **判断是否需要新 tool**：
+   - 若现有 `shell/file` 足够，直接结束。
+   - 若不够，进入工具生成。
+5. **生成 tool 代码（如需）**：
+   - 实现 `Name/Description/Call`。
+   - 保持白名单与安全边界。
+   - 在主注册逻辑或 toolhost 注册逻辑中接入。
+6. **生成测试并执行**：
+   - 补充 `*_test.go`。
+   - 执行 `go test ./...`。
+7. **结果汇报**：给出新增文件清单、测试结果、风险说明。
+
+## 5. 质量约束
+- 不覆盖已有稳定 skill，优先新增目录。
+- 避免引入高风险命令；遵循 `ALLOWED_COMMANDS` 与 `ALLOWED_DIRS`。
+- 生成代码必须尽量小步、可测试、可回滚。
+- 无法可靠完成时，明确说明缺失信息并给出最短下一步。
+
+## 6. 输出模板
+- 新增技能：`skills/<skill_name>/skill.md`
+- 新增工具（可选）：`internal/tools/<tool_name>/...`
+- 测试结果：`go test ./...` 的通过/失败摘要
+- 后续动作：是否需要热加载、是否需要补充环境变量