Files
LaodingBot/tools/websearch/websearch.go

396 lines
12 KiB
Go
Raw Permalink Normal View History

package websearch
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"laodingbot/internal/logger"
)
// Config 定义了网络搜索工具所需的配置参数。
type Config struct {
Engine string // 搜索引擎类型,支持 "duckduckgo"、"brave" 或 "tavily"
APIKey string // 搜索引擎的 API KeyBrave 或 Tavily 搜索必填)
}
// Tool represents a web search tool.
// Tool 定义了一个网络搜索工具的结构,用于执行互联网检索并获取摘要。
type Tool struct {
// engine 当前使用的搜索引擎标识。
engine string
// apiKey 执行搜索时需要的认证 Key。
apiKey string
// httpClient 发送 HTTP 请求所使用的客户端。
httpClient *http.Client
// maxOutputChars 返回搜索结果的最大字符数限制。
maxOutputChars int
// log 日志记录器,跟踪搜索请求与执行状态。
log *logger.Logger
}
// New 初始化并返回一个新的 websearch 工具实例。
// cfg: 网络搜索工具的相关配置。
// maxOutputChars: 规范化结果文本截断的最大长度。
// log: 外部传入的日志记录组件。
func New(cfg Config, maxOutputChars int, log *logger.Logger) *Tool {
engine := strings.TrimSpace(cfg.Engine)
if engine == "" {
engine = "duckduckgo"
}
if maxOutputChars <= 0 {
maxOutputChars = 4000
}
if log != nil {
log.Infof("websearch tool initialized engine=%s max_output_chars=%d", engine, maxOutputChars)
}
return &Tool{
engine: engine,
apiKey: strings.TrimSpace(cfg.APIKey),
httpClient: &http.Client{Timeout: 15 * time.Second},
maxOutputChars: maxOutputChars,
log: log,
}
}
// Name 返回此工具的名称定义,供模型调用时识别。
func (t *Tool) Name() string { return "web_search" }
// Description 描述此工具的作用及入参、出参格式。
func (t *Tool) Description() string {
return "Search the web. Input: search query string. Returns formatted search results."
}
// Call 执行具体的搜索动作。
// ctx: 带有超时/取消机制的上下文。
// input: 用户的搜索查询词。
// 成功时返回搜索到的格式化文本结果(受最大字符数限制)。
func (t *Tool) Call(ctx context.Context, input string) (string, error) {
query := strings.TrimSpace(input)
if query == "" {
return "", fmt.Errorf("empty search query")
}
if t.log != nil {
t.log.Infof("websearch query=%q engine=%s", query, t.engine)
}
var result string
var err error
switch t.engine {
case "brave":
result, err = t.searchBrave(ctx, query)
case "tavily":
result, err = t.searchTavily(ctx, query)
default:
result, err = t.searchDuckDuckGo(ctx, query)
}
if err != nil {
if t.log != nil {
t.log.Errorf("websearch failed query=%q engine=%s err=%v", query, t.engine, err)
}
return "", err
}
if len(result) > t.maxOutputChars {
result = result[:t.maxOutputChars]
}
if t.log != nil {
t.log.Infof("websearch success query=%q engine=%s result_len=%d", query, t.engine, len(result))
}
return result, nil
}
// searchDuckDuckGo uses the DuckDuckGo Instant Answer API (no API key required).
// 使用无 key 的 DuckDuckGo 搜索即时解答抽象内容接口。
func (t *Tool) searchDuckDuckGo(ctx context.Context, query string) (string, error) {
apiURL := "https://api.duckduckgo.com/?q=" + url.QueryEscape(query) + "&format=json&no_html=1&skip_disambig=1"
req, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil)
if err != nil {
return "", fmt.Errorf("create request failed: %w", err)
}
req.Header.Set("User-Agent", "LaodingBot/1.0")
resp, err := t.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("http request failed: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024))
if err != nil {
return "", fmt.Errorf("read response body failed: %w", err)
}
if t.log != nil {
t.log.Debugf("duckduckgo raw response: %s", string(body))
}
var ddg duckDuckGoResponse
if err := json.Unmarshal(body, &ddg); err != nil {
return "", fmt.Errorf("parse duckduckgo response failed: %w", err)
}
result := t.formatDuckDuckGoResult(query, ddg)
if t.log != nil {
t.log.Infof("duckduckgo search finished, content_found=%v", (ddg.Answer != "" || ddg.AbstractText != "" || len(ddg.RelatedTopics) > 0))
}
return result, nil
}
// duckDuckGoResponse 从 DuckDuckGo 获取的即时结果 JSON 映射结构。
type duckDuckGoResponse struct {
Abstract string `json:"Abstract"`
AbstractText string `json:"AbstractText"`
AbstractSource string `json:"AbstractSource"`
AbstractURL string `json:"AbstractURL"`
Answer string `json:"Answer"`
AnswerType string `json:"AnswerType"`
Heading string `json:"Heading"`
RelatedTopics []ddgRelatedItem `json:"RelatedTopics"`
}
// ddgRelatedItem 代表相关的搜索条目/话题。
type ddgRelatedItem struct {
Text string `json:"Text"`
FirstURL string `json:"FirstURL"`
}
// formatDuckDuckGoResult 将 DuckDuckGo 提供的结果结构打包为纯文本格式化输出,便于传递给下一个节点。
func (t *Tool) formatDuckDuckGoResult(query string, ddg duckDuckGoResponse) string {
b := strings.Builder{}
b.WriteString("Search: " + query + "\n")
b.WriteString("Engine: DuckDuckGo\n\n")
hasContent := false
if ddg.Answer != "" {
b.WriteString("Answer: " + ddg.Answer + "\n\n")
hasContent = true
}
if ddg.AbstractText != "" {
b.WriteString("Summary: " + ddg.AbstractText + "\n")
if ddg.AbstractSource != "" {
b.WriteString("Source: " + ddg.AbstractSource + "\n")
}
if ddg.AbstractURL != "" {
b.WriteString("URL: " + ddg.AbstractURL + "\n")
}
b.WriteString("\n")
hasContent = true
}
if len(ddg.RelatedTopics) > 0 {
b.WriteString("Related:\n")
count := 0
for _, topic := range ddg.RelatedTopics {
if topic.Text == "" {
continue
}
text := topic.Text
if len(text) > 300 {
text = text[:300]
}
b.WriteString(fmt.Sprintf("- %s", text))
if topic.FirstURL != "" {
b.WriteString(fmt.Sprintf(" (%s)", topic.FirstURL))
}
b.WriteString("\n")
count++
if count >= 8 {
break
}
}
hasContent = true
}
if !hasContent {
b.WriteString("No instant answer available for this query. Try a more specific search or use a different search engine.\n")
}
return strings.TrimSpace(b.String())
// 使用 Brave Search API 进行实际的搜索引擎查询获取多条结果(需要订阅 Token
}
// searchBrave uses the Brave Search API (requires API key).
func (t *Tool) searchBrave(ctx context.Context, query string) (string, error) {
if t.apiKey == "" {
return "", fmt.Errorf("WEB_SEARCH_API_KEY is required for Brave Search engine")
}
apiURL := "https://api.search.brave.com/res/v1/web/search?q=" + url.QueryEscape(query) + "&count=8"
req, err := http.NewRequestWithContext(ctx, http.MethodGet, apiURL, nil)
if err != nil {
return "", fmt.Errorf("create request failed: %w", err)
}
req.Header.Set("Accept", "application/json")
req.Header.Set("Accept-Encoding", "gzip")
req.Header.Set("X-Subscription-Token", t.apiKey)
resp, err := t.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("http request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodySnippet, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
return "", fmt.Errorf("brave search returned status %d: %s", resp.StatusCode, string(bodySnippet))
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
if err != nil {
return "", fmt.Errorf("read response body failed: %w", err)
}
if t.log != nil {
t.log.Debugf("brave search raw response: %s", string(body))
}
var braveResp braveSearchResponse
if err := json.Unmarshal(body, &braveResp); err != nil {
return "", fmt.Errorf("parse brave response failed: %w", err)
}
if t.log != nil {
t.log.Infof("brave search finished, results_count=%d", len(braveResp.Web.Results))
}
return t.formatBraveResult(query, braveResp), nil
}
// braveSearchResponse 用于接收 Brave Search Web 层面的基本搜索返回结果。
type braveSearchResponse struct {
Web struct {
Results []braveWebResult `json:"results"`
} `json:"web"`
}
// braveWebResult 用于表示单独的网页搜索结果摘要信息。
type braveWebResult struct {
Title string `json:"title"`
URL string `json:"url"`
Description string `json:"description"`
}
// formatBraveResult 将接收到底层的 Brave 搜索内容整合成对模型友好的文本视图,截断长字符防干扰。}
func (t *Tool) formatBraveResult(query string, resp braveSearchResponse) string {
b := strings.Builder{}
b.WriteString("Search: " + query + "\n")
b.WriteString("Engine: Brave\n\n")
if len(resp.Web.Results) == 0 {
b.WriteString("No results found.\n")
return strings.TrimSpace(b.String())
}
for i, r := range resp.Web.Results {
if i >= 8 {
break
}
desc := r.Description
if len(desc) > 300 {
desc = desc[:300]
}
b.WriteString(fmt.Sprintf("%d. %s\n %s\n %s\n\n", i+1, r.Title, r.URL, desc))
}
return strings.TrimSpace(b.String())
}
// searchTavily uses the Tavily Search API (requires API key).
func (t *Tool) searchTavily(ctx context.Context, query string) (string, error) {
if t.apiKey == "" {
return "", fmt.Errorf("WEB_SEARCH_API_KEY is required for Tavily engine")
}
apiURL := "https://api.tavily.com/search"
payload := map[string]interface{}{
"api_key": t.apiKey,
"query": query,
"search_depth": "basic",
"include_answer": true,
"include_images": false,
"include_raw_content": false,
"max_results": 5,
}
jsonData, err := json.Marshal(payload)
if err != nil {
return "", fmt.Errorf("marshal tavily payload failed: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, strings.NewReader(string(jsonData)))
if err != nil {
return "", fmt.Errorf("create request failed: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := t.httpClient.Do(req)
if err != nil {
return "", fmt.Errorf("http request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodySnippet, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
return "", fmt.Errorf("tavily search returned status %d: %s", resp.StatusCode, string(bodySnippet))
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
if err != nil {
return "", fmt.Errorf("read response body failed: %w", err)
}
if t.log != nil {
t.log.Debugf("tavily search raw response: %s", string(body))
}
var tavilyResp tavilyResponse
if err := json.Unmarshal(body, &tavilyResp); err != nil {
return "", fmt.Errorf("parse tavily response failed: %w", err)
}
return t.formatTavilyResult(query, tavilyResp), nil
}
type tavilyResponse struct {
Answer string `json:"answer"`
Results []tavilyResult `json:"results"`
}
type tavilyResult struct {
Title string `json:"title"`
URL string `json:"url"`
Content string `json:"content"`
Score float64 `json:"score"`
}
func (t *Tool) formatTavilyResult(query string, resp tavilyResponse) string {
b := strings.Builder{}
b.WriteString("Search: " + query + "\n")
b.WriteString("Engine: Tavily\n\n")
if resp.Answer != "" {
b.WriteString("Answer: " + resp.Answer + "\n\n")
}
if len(resp.Results) == 0 && resp.Answer == "" {
b.WriteString("No results found.\n")
return strings.TrimSpace(b.String())
}
for i, r := range resp.Results {
b.WriteString(fmt.Sprintf("%d. %s\n %s\n %s\n\n", i+1, r.Title, r.URL, r.Content))
}
return strings.TrimSpace(b.String())
}