将flask改成fastapi
This commit is contained in:
58
agent/component/__init__.py
Normal file
58
agent/component/__init__.py
Normal file
@@ -0,0 +1,58 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
import importlib
|
||||
import inspect
|
||||
from types import ModuleType
|
||||
from typing import Dict, Type
|
||||
|
||||
_package_path = os.path.dirname(__file__)
|
||||
__all_classes: Dict[str, Type] = {}
|
||||
|
||||
def _import_submodules() -> None:
|
||||
for filename in os.listdir(_package_path): # noqa: F821
|
||||
if filename.startswith("__") or not filename.endswith(".py") or filename.startswith("base"):
|
||||
continue
|
||||
module_name = filename[:-3]
|
||||
|
||||
try:
|
||||
module = importlib.import_module(f".{module_name}", package=__name__)
|
||||
_extract_classes_from_module(module) # noqa: F821
|
||||
except ImportError as e:
|
||||
print(f"Warning: Failed to import module {module_name}: {str(e)}")
|
||||
|
||||
def _extract_classes_from_module(module: ModuleType) -> None:
|
||||
for name, obj in inspect.getmembers(module):
|
||||
if (inspect.isclass(obj) and
|
||||
obj.__module__ == module.__name__ and not name.startswith("_")):
|
||||
__all_classes[name] = obj
|
||||
globals()[name] = obj
|
||||
|
||||
_import_submodules()
|
||||
|
||||
__all__ = list(__all_classes.keys()) + ["__all_classes"]
|
||||
|
||||
del _package_path, _import_submodules, _extract_classes_from_module
|
||||
|
||||
|
||||
def component_class(class_name):
|
||||
for mdl in ["agent.component", "agent.tools", "rag.flow"]:
|
||||
try:
|
||||
return getattr(importlib.import_module(mdl), class_name)
|
||||
except Exception:
|
||||
pass
|
||||
assert False, f"Can't import {class_name}"
|
||||
352
agent/component/agent_with_tools.py
Normal file
352
agent/component/agent_with_tools.py
Normal file
@@ -0,0 +1,352 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import deepcopy
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
|
||||
import json_repair
|
||||
from timeit import default_timer as timer
|
||||
from agent.tools.base import LLMToolPluginCallSession, ToolParamBase, ToolBase, ToolMeta
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.db.services.mcp_server_service import MCPServerService
|
||||
from api.utils.api_utils import timeout
|
||||
from rag.prompts.generator import next_step, COMPLETE_TASK, analyze_task, \
|
||||
citation_prompt, reflect, rank_memories, kb_prompt, citation_plus, full_question, message_fit_in
|
||||
from rag.utils.mcp_tool_call_conn import MCPToolCallSession, mcp_tool_metadata_to_openai_tool
|
||||
from agent.component.llm import LLMParam, LLM
|
||||
|
||||
|
||||
class AgentParam(LLMParam, ToolParamBase):
|
||||
"""
|
||||
Define the Agent component parameters.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.meta:ToolMeta = {
|
||||
"name": "agent",
|
||||
"description": "This is an agent for a specific task.",
|
||||
"parameters": {
|
||||
"user_prompt": {
|
||||
"type": "string",
|
||||
"description": "This is the order you need to send to the agent.",
|
||||
"default": "",
|
||||
"required": True
|
||||
},
|
||||
"reasoning": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Supervisor's reasoning for choosing the this agent. "
|
||||
"Explain why this agent is being invoked and what is expected of it."
|
||||
),
|
||||
"required": True
|
||||
},
|
||||
"context": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"All relevant background information, prior facts, decisions, "
|
||||
"and state needed by the agent to solve the current query. "
|
||||
"Should be as detailed and self-contained as possible."
|
||||
),
|
||||
"required": True
|
||||
},
|
||||
}
|
||||
}
|
||||
super().__init__()
|
||||
self.function_name = "agent"
|
||||
self.tools = []
|
||||
self.mcp = []
|
||||
self.max_rounds = 5
|
||||
self.description = ""
|
||||
|
||||
|
||||
class Agent(LLM, ToolBase):
|
||||
component_name = "Agent"
|
||||
|
||||
def __init__(self, canvas, id, param: LLMParam):
|
||||
LLM.__init__(self, canvas, id, param)
|
||||
self.tools = {}
|
||||
for cpn in self._param.tools:
|
||||
cpn = self._load_tool_obj(cpn)
|
||||
self.tools[cpn.get_meta()["function"]["name"]] = cpn
|
||||
|
||||
self.chat_mdl = LLMBundle(self._canvas.get_tenant_id(), TenantLLMService.llm_id2llm_type(self._param.llm_id), self._param.llm_id,
|
||||
max_retries=self._param.max_retries,
|
||||
retry_interval=self._param.delay_after_error,
|
||||
max_rounds=self._param.max_rounds,
|
||||
verbose_tool_use=True
|
||||
)
|
||||
self.tool_meta = [v.get_meta() for _,v in self.tools.items()]
|
||||
|
||||
for mcp in self._param.mcp:
|
||||
_, mcp_server = MCPServerService.get_by_id(mcp["mcp_id"])
|
||||
tool_call_session = MCPToolCallSession(mcp_server, mcp_server.variables)
|
||||
for tnm, meta in mcp["tools"].items():
|
||||
self.tool_meta.append(mcp_tool_metadata_to_openai_tool(meta))
|
||||
self.tools[tnm] = tool_call_session
|
||||
self.callback = partial(self._canvas.tool_use_callback, id)
|
||||
self.toolcall_session = LLMToolPluginCallSession(self.tools, self.callback)
|
||||
#self.chat_mdl.bind_tools(self.toolcall_session, self.tool_metas)
|
||||
|
||||
def _load_tool_obj(self, cpn: dict) -> object:
|
||||
from agent.component import component_class
|
||||
param = component_class(cpn["component_name"] + "Param")()
|
||||
param.update(cpn["params"])
|
||||
try:
|
||||
param.check()
|
||||
except Exception as e:
|
||||
self.set_output("_ERROR", cpn["component_name"] + f" configuration error: {e}")
|
||||
raise
|
||||
cpn_id = f"{self._id}-->" + cpn.get("name", "").replace(" ", "_")
|
||||
return component_class(cpn["component_name"])(self._canvas, cpn_id, param)
|
||||
|
||||
def get_meta(self) -> dict[str, Any]:
|
||||
self._param.function_name= self._id.split("-->")[-1]
|
||||
m = super().get_meta()
|
||||
if hasattr(self._param, "user_prompt") and self._param.user_prompt:
|
||||
m["function"]["parameters"]["properties"]["user_prompt"] = self._param.user_prompt
|
||||
return m
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
res = {}
|
||||
for k, v in self.get_input_elements().items():
|
||||
res[k] = {
|
||||
"type": "line",
|
||||
"name": v["name"]
|
||||
}
|
||||
for cpn in self._param.tools:
|
||||
if not isinstance(cpn, LLM):
|
||||
continue
|
||||
res.update(cpn.get_input_form())
|
||||
return res
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 20*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if kwargs.get("user_prompt"):
|
||||
usr_pmt = ""
|
||||
if kwargs.get("reasoning"):
|
||||
usr_pmt += "\nREASONING:\n{}\n".format(kwargs["reasoning"])
|
||||
if kwargs.get("context"):
|
||||
usr_pmt += "\nCONTEXT:\n{}\n".format(kwargs["context"])
|
||||
if usr_pmt:
|
||||
usr_pmt += "\nQUERY:\n{}\n".format(str(kwargs["user_prompt"]))
|
||||
else:
|
||||
usr_pmt = str(kwargs["user_prompt"])
|
||||
self._param.prompts = [{"role": "user", "content": usr_pmt}]
|
||||
|
||||
if not self.tools:
|
||||
return LLM._invoke(self, **kwargs)
|
||||
|
||||
prompt, msg, user_defined_prompt = self._prepare_prompt_variables()
|
||||
|
||||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not self._param.output_structure and not (ex and ex["goto"]):
|
||||
self.set_output("content", partial(self.stream_output_with_tools, prompt, msg, user_defined_prompt))
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
use_tools = []
|
||||
ans = ""
|
||||
for delta_ans, tk in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt):
|
||||
ans += delta_ans
|
||||
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"Agent._chat got error. response: {ans}")
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
else:
|
||||
self.set_output("_ERROR", ans)
|
||||
return
|
||||
|
||||
self.set_output("content", ans)
|
||||
if use_tools:
|
||||
self.set_output("use_tools", use_tools)
|
||||
return ans
|
||||
|
||||
def stream_output_with_tools(self, prompt, msg, user_defined_prompt={}):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer_without_toolcall = ""
|
||||
use_tools = []
|
||||
for delta_ans,_ in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt):
|
||||
if delta_ans.find("**ERROR**") >= 0:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
yield self.get_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", delta_ans)
|
||||
answer_without_toolcall += delta_ans
|
||||
yield delta_ans
|
||||
|
||||
self.set_output("content", answer_without_toolcall)
|
||||
if use_tools:
|
||||
self.set_output("use_tools", use_tools)
|
||||
|
||||
def _gen_citations(self, text):
|
||||
retrievals = self._canvas.get_reference()
|
||||
retrievals = {"chunks": list(retrievals["chunks"].values()), "doc_aggs": list(retrievals["doc_aggs"].values())}
|
||||
formated_refer = kb_prompt(retrievals, self.chat_mdl.max_length, True)
|
||||
for delta_ans in self._generate_streamly([{"role": "system", "content": citation_plus("\n\n".join(formated_refer))},
|
||||
{"role": "user", "content": text}
|
||||
]):
|
||||
yield delta_ans
|
||||
|
||||
def _react_with_tools_streamly(self, prompt, history: list[dict], use_tools, user_defined_prompt={}):
|
||||
token_count = 0
|
||||
tool_metas = self.tool_meta
|
||||
hist = deepcopy(history)
|
||||
last_calling = ""
|
||||
if len(hist) > 3:
|
||||
st = timer()
|
||||
user_request = full_question(messages=history, chat_mdl=self.chat_mdl)
|
||||
self.callback("Multi-turn conversation optimization", {}, user_request, elapsed_time=timer()-st)
|
||||
else:
|
||||
user_request = history[-1]["content"]
|
||||
|
||||
def use_tool(name, args):
|
||||
nonlocal hist, use_tools, token_count,last_calling,user_request
|
||||
logging.info(f"{last_calling=} == {name=}")
|
||||
# Summarize of function calling
|
||||
#if all([
|
||||
# isinstance(self.toolcall_session.get_tool_obj(name), Agent),
|
||||
# last_calling,
|
||||
# last_calling != name
|
||||
#]):
|
||||
# self.toolcall_session.get_tool_obj(name).add2system_prompt(f"The chat history with other agents are as following: \n" + self.get_useful_memory(user_request, str(args["user_prompt"]),user_defined_prompt))
|
||||
last_calling = name
|
||||
tool_response = self.toolcall_session.tool_call(name, args)
|
||||
use_tools.append({
|
||||
"name": name,
|
||||
"arguments": args,
|
||||
"results": tool_response
|
||||
})
|
||||
# self.callback("add_memory", {}, "...")
|
||||
#self.add_memory(hist[-2]["content"], hist[-1]["content"], name, args, str(tool_response), user_defined_prompt)
|
||||
|
||||
return name, tool_response
|
||||
|
||||
def complete():
|
||||
nonlocal hist
|
||||
need2cite = self._param.cite and self._canvas.get_reference()["chunks"] and self._id.find("-->") < 0
|
||||
cited = False
|
||||
if hist[0]["role"] == "system" and need2cite:
|
||||
if len(hist) < 7:
|
||||
hist[0]["content"] += citation_prompt()
|
||||
cited = True
|
||||
yield "", token_count
|
||||
|
||||
_hist = hist
|
||||
if len(hist) > 12:
|
||||
_hist = [hist[0], hist[1], *hist[-10:]]
|
||||
entire_txt = ""
|
||||
for delta_ans in self._generate_streamly(_hist):
|
||||
if not need2cite or cited:
|
||||
yield delta_ans, 0
|
||||
entire_txt += delta_ans
|
||||
if not need2cite or cited:
|
||||
return
|
||||
|
||||
st = timer()
|
||||
txt = ""
|
||||
for delta_ans in self._gen_citations(entire_txt):
|
||||
yield delta_ans, 0
|
||||
txt += delta_ans
|
||||
|
||||
self.callback("gen_citations", {}, txt, elapsed_time=timer()-st)
|
||||
|
||||
def append_user_content(hist, content):
|
||||
if hist[-1]["role"] == "user":
|
||||
hist[-1]["content"] += content
|
||||
else:
|
||||
hist.append({"role": "user", "content": content})
|
||||
|
||||
st = timer()
|
||||
task_desc = analyze_task(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
|
||||
self.callback("analyze_task", {}, task_desc, elapsed_time=timer()-st)
|
||||
for _ in range(self._param.max_rounds + 1):
|
||||
response, tk = next_step(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
|
||||
# self.callback("next_step", {}, str(response)[:256]+"...")
|
||||
token_count += tk
|
||||
hist.append({"role": "assistant", "content": response})
|
||||
try:
|
||||
functions = json_repair.loads(re.sub(r"```.*", "", response))
|
||||
if not isinstance(functions, list):
|
||||
raise TypeError(f"List should be returned, but `{functions}`")
|
||||
for f in functions:
|
||||
if not isinstance(f, dict):
|
||||
raise TypeError(f"An object type should be returned, but `{f}`")
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
thr = []
|
||||
for func in functions:
|
||||
name = func["name"]
|
||||
args = func["arguments"]
|
||||
if name == COMPLETE_TASK:
|
||||
append_user_content(hist, f"Respond with a formal answer. FORGET(DO NOT mention) about `{COMPLETE_TASK}`. The language for the response MUST be as the same as the first user request.\n")
|
||||
for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
return
|
||||
|
||||
thr.append(executor.submit(use_tool, name, args))
|
||||
|
||||
st = timer()
|
||||
reflection = reflect(self.chat_mdl, hist, [th.result() for th in thr], user_defined_prompt)
|
||||
append_user_content(hist, reflection)
|
||||
self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(msg=f"Wrong JSON argument format in LLM ReAct response: {e}")
|
||||
e = f"\nTool call error, please correct the input parameter of response format and call it again.\n *** Exception ***\n{e}"
|
||||
append_user_content(hist, str(e))
|
||||
|
||||
logging.warning( f"Exceed max rounds: {self._param.max_rounds}")
|
||||
final_instruction = f"""
|
||||
{user_request}
|
||||
IMPORTANT: You have reached the conversation limit. Based on ALL the information and research you have gathered so far, please provide a DIRECT and COMPREHENSIVE final answer to the original request.
|
||||
Instructions:
|
||||
1. SYNTHESIZE all information collected during this conversation
|
||||
2. Provide a COMPLETE response using existing data - do not suggest additional research
|
||||
3. Structure your response as a FINAL DELIVERABLE, not a plan
|
||||
4. If information is incomplete, state what you found and provide the best analysis possible with available data
|
||||
5. DO NOT mention conversation limits or suggest further steps
|
||||
6. Focus on delivering VALUE with the information already gathered
|
||||
Respond immediately with your final comprehensive answer.
|
||||
"""
|
||||
append_user_content(hist, final_instruction)
|
||||
|
||||
for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
|
||||
def get_useful_memory(self, goal: str, sub_goal:str, topn=3, user_defined_prompt:dict={}) -> str:
|
||||
# self.callback("get_useful_memory", {"topn": 3}, "...")
|
||||
mems = self._canvas.get_memory()
|
||||
rank = rank_memories(self.chat_mdl, goal, sub_goal, [summ for (user, assist, summ) in mems], user_defined_prompt)
|
||||
try:
|
||||
rank = json_repair.loads(re.sub(r"```.*", "", rank))[:topn]
|
||||
mems = [mems[r] for r in rank]
|
||||
return "\n\n".join([f"User: {u}\nAgent: {a}" for u, a,_ in mems])
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
return "Error occurred."
|
||||
|
||||
def reset(self):
|
||||
for k, cpn in self.tools.items():
|
||||
cpn.reset()
|
||||
|
||||
564
agent/component/base.py
Normal file
564
agent/component/base.py
Normal file
@@ -0,0 +1,564 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import re
|
||||
import time
|
||||
from abc import ABC
|
||||
import builtins
|
||||
import json
|
||||
import os
|
||||
import logging
|
||||
from typing import Any, List, Union
|
||||
import pandas as pd
|
||||
import trio
|
||||
from agent import settings
|
||||
from api.utils.api_utils import timeout
|
||||
|
||||
|
||||
_FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
|
||||
_DEPRECATED_PARAMS = "_deprecated_params"
|
||||
_USER_FEEDED_PARAMS = "_user_feeded_params"
|
||||
_IS_RAW_CONF = "_is_raw_conf"
|
||||
|
||||
|
||||
class ComponentParamBase(ABC):
|
||||
def __init__(self):
|
||||
self.message_history_window_size = 13
|
||||
self.inputs = {}
|
||||
self.outputs = {}
|
||||
self.description = ""
|
||||
self.max_retries = 0
|
||||
self.delay_after_error = 2.0
|
||||
self.exception_method = None
|
||||
self.exception_default_value = None
|
||||
self.exception_goto = None
|
||||
self.debug_inputs = {}
|
||||
|
||||
def set_name(self, name: str):
|
||||
self._name = name
|
||||
return self
|
||||
|
||||
def check(self):
|
||||
raise NotImplementedError("Parameter Object should be checked.")
|
||||
|
||||
@classmethod
|
||||
def _get_or_init_deprecated_params_set(cls):
|
||||
if not hasattr(cls, _DEPRECATED_PARAMS):
|
||||
setattr(cls, _DEPRECATED_PARAMS, set())
|
||||
return getattr(cls, _DEPRECATED_PARAMS)
|
||||
|
||||
def _get_or_init_feeded_deprecated_params_set(self, conf=None):
|
||||
if not hasattr(self, _FEEDED_DEPRECATED_PARAMS):
|
||||
if conf is None:
|
||||
setattr(self, _FEEDED_DEPRECATED_PARAMS, set())
|
||||
else:
|
||||
setattr(
|
||||
self,
|
||||
_FEEDED_DEPRECATED_PARAMS,
|
||||
set(conf[_FEEDED_DEPRECATED_PARAMS]),
|
||||
)
|
||||
return getattr(self, _FEEDED_DEPRECATED_PARAMS)
|
||||
|
||||
def _get_or_init_user_feeded_params_set(self, conf=None):
|
||||
if not hasattr(self, _USER_FEEDED_PARAMS):
|
||||
if conf is None:
|
||||
setattr(self, _USER_FEEDED_PARAMS, set())
|
||||
else:
|
||||
setattr(self, _USER_FEEDED_PARAMS, set(conf[_USER_FEEDED_PARAMS]))
|
||||
return getattr(self, _USER_FEEDED_PARAMS)
|
||||
|
||||
def get_user_feeded(self):
|
||||
return self._get_or_init_user_feeded_params_set()
|
||||
|
||||
def get_feeded_deprecated_params(self):
|
||||
return self._get_or_init_feeded_deprecated_params_set()
|
||||
|
||||
@property
|
||||
def _deprecated_params_set(self):
|
||||
return {name: True for name in self.get_feeded_deprecated_params()}
|
||||
|
||||
def __str__(self):
|
||||
return json.dumps(self.as_dict(), ensure_ascii=False)
|
||||
|
||||
def as_dict(self):
|
||||
def _recursive_convert_obj_to_dict(obj):
|
||||
ret_dict = {}
|
||||
if isinstance(obj, dict):
|
||||
for k,v in obj.items():
|
||||
if isinstance(v, dict) or (v and type(v).__name__ not in dir(builtins)):
|
||||
ret_dict[k] = _recursive_convert_obj_to_dict(v)
|
||||
else:
|
||||
ret_dict[k] = v
|
||||
return ret_dict
|
||||
|
||||
for attr_name in list(obj.__dict__):
|
||||
if attr_name in [_FEEDED_DEPRECATED_PARAMS, _DEPRECATED_PARAMS, _USER_FEEDED_PARAMS, _IS_RAW_CONF]:
|
||||
continue
|
||||
# get attr
|
||||
attr = getattr(obj, attr_name)
|
||||
if isinstance(attr, pd.DataFrame):
|
||||
ret_dict[attr_name] = attr.to_dict()
|
||||
continue
|
||||
if isinstance(attr, dict) or (attr and type(attr).__name__ not in dir(builtins)):
|
||||
ret_dict[attr_name] = _recursive_convert_obj_to_dict(attr)
|
||||
else:
|
||||
ret_dict[attr_name] = attr
|
||||
|
||||
return ret_dict
|
||||
|
||||
return _recursive_convert_obj_to_dict(self)
|
||||
|
||||
def update(self, conf, allow_redundant=False):
|
||||
update_from_raw_conf = conf.get(_IS_RAW_CONF, True)
|
||||
if update_from_raw_conf:
|
||||
deprecated_params_set = self._get_or_init_deprecated_params_set()
|
||||
feeded_deprecated_params_set = (
|
||||
self._get_or_init_feeded_deprecated_params_set()
|
||||
)
|
||||
user_feeded_params_set = self._get_or_init_user_feeded_params_set()
|
||||
setattr(self, _IS_RAW_CONF, False)
|
||||
else:
|
||||
feeded_deprecated_params_set = (
|
||||
self._get_or_init_feeded_deprecated_params_set(conf)
|
||||
)
|
||||
user_feeded_params_set = self._get_or_init_user_feeded_params_set(conf)
|
||||
|
||||
def _recursive_update_param(param, config, depth, prefix):
|
||||
if depth > settings.PARAM_MAXDEPTH:
|
||||
raise ValueError("Param define nesting too deep!!!, can not parse it")
|
||||
|
||||
inst_variables = param.__dict__
|
||||
redundant_attrs = []
|
||||
for config_key, config_value in config.items():
|
||||
# redundant attr
|
||||
if config_key not in inst_variables:
|
||||
if not update_from_raw_conf and config_key.startswith("_"):
|
||||
setattr(param, config_key, config_value)
|
||||
else:
|
||||
setattr(param, config_key, config_value)
|
||||
# redundant_attrs.append(config_key)
|
||||
continue
|
||||
|
||||
full_config_key = f"{prefix}{config_key}"
|
||||
|
||||
if update_from_raw_conf:
|
||||
# add user feeded params
|
||||
user_feeded_params_set.add(full_config_key)
|
||||
|
||||
# update user feeded deprecated param set
|
||||
if full_config_key in deprecated_params_set:
|
||||
feeded_deprecated_params_set.add(full_config_key)
|
||||
|
||||
# supported attr
|
||||
attr = getattr(param, config_key)
|
||||
if type(attr).__name__ in dir(builtins) or attr is None:
|
||||
setattr(param, config_key, config_value)
|
||||
|
||||
else:
|
||||
# recursive set obj attr
|
||||
sub_params = _recursive_update_param(
|
||||
attr, config_value, depth + 1, prefix=f"{prefix}{config_key}."
|
||||
)
|
||||
setattr(param, config_key, sub_params)
|
||||
|
||||
if not allow_redundant and redundant_attrs:
|
||||
raise ValueError(
|
||||
f"cpn `{getattr(self, '_name', type(self))}` has redundant parameters: `{[redundant_attrs]}`"
|
||||
)
|
||||
|
||||
return param
|
||||
|
||||
return _recursive_update_param(param=self, config=conf, depth=0, prefix="")
|
||||
|
||||
def extract_not_builtin(self):
|
||||
def _get_not_builtin_types(obj):
|
||||
ret_dict = {}
|
||||
for variable in obj.__dict__:
|
||||
attr = getattr(obj, variable)
|
||||
if attr and type(attr).__name__ not in dir(builtins):
|
||||
ret_dict[variable] = _get_not_builtin_types(attr)
|
||||
|
||||
return ret_dict
|
||||
|
||||
return _get_not_builtin_types(self)
|
||||
|
||||
def validate(self):
|
||||
self.builtin_types = dir(builtins)
|
||||
self.func = {
|
||||
"ge": self._greater_equal_than,
|
||||
"le": self._less_equal_than,
|
||||
"in": self._in,
|
||||
"not_in": self._not_in,
|
||||
"range": self._range,
|
||||
}
|
||||
home_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
||||
param_validation_path_prefix = home_dir + "/param_validation/"
|
||||
|
||||
param_name = type(self).__name__
|
||||
param_validation_path = "/".join(
|
||||
[param_validation_path_prefix, param_name + ".json"]
|
||||
)
|
||||
|
||||
validation_json = None
|
||||
|
||||
try:
|
||||
with open(param_validation_path, "r") as fin:
|
||||
validation_json = json.loads(fin.read())
|
||||
except BaseException:
|
||||
return
|
||||
|
||||
self._validate_param(self, validation_json)
|
||||
|
||||
def _validate_param(self, param_obj, validation_json):
|
||||
default_section = type(param_obj).__name__
|
||||
var_list = param_obj.__dict__
|
||||
|
||||
for variable in var_list:
|
||||
attr = getattr(param_obj, variable)
|
||||
|
||||
if type(attr).__name__ in self.builtin_types or attr is None:
|
||||
if variable not in validation_json:
|
||||
continue
|
||||
|
||||
validation_dict = validation_json[default_section][variable]
|
||||
value = getattr(param_obj, variable)
|
||||
value_legal = False
|
||||
|
||||
for op_type in validation_dict:
|
||||
if self.func[op_type](value, validation_dict[op_type]):
|
||||
value_legal = True
|
||||
break
|
||||
|
||||
if not value_legal:
|
||||
raise ValueError(
|
||||
"Please check runtime conf, {} = {} does not match user-parameter restriction".format(
|
||||
variable, value
|
||||
)
|
||||
)
|
||||
|
||||
elif variable in validation_json:
|
||||
self._validate_param(attr, validation_json)
|
||||
|
||||
@staticmethod
|
||||
def check_string(param, descr):
|
||||
if type(param).__name__ not in ["str"]:
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be string type".format(param)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_empty(param, descr):
|
||||
if not param:
|
||||
raise ValueError(
|
||||
descr + " does not support empty value."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_positive_integer(param, descr):
|
||||
if type(param).__name__ not in ["int", "long"] or param <= 0:
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be positive integer".format(param)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_positive_number(param, descr):
|
||||
if type(param).__name__ not in ["float", "int", "long"] or param <= 0:
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be positive numeric".format(param)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_nonnegative_number(param, descr):
|
||||
if type(param).__name__ not in ["float", "int", "long"] or param < 0:
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} not supported, should be non-negative numeric".format(param)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_decimal_float(param, descr):
|
||||
if type(param).__name__ not in ["float", "int"] or param < 0 or param > 1:
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} not supported, should be a float number in range [0, 1]".format(
|
||||
param
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_boolean(param, descr):
|
||||
if type(param).__name__ != "bool":
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be bool type".format(param)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_open_unit_interval(param, descr):
|
||||
if type(param).__name__ not in ["float"] or param <= 0 or param >= 1:
|
||||
raise ValueError(
|
||||
descr + " should be a numeric number between 0 and 1 exclusively"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_valid_value(param, descr, valid_values):
|
||||
if param not in valid_values:
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} is not supported, it should be in {}".format(param, valid_values)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_defined_type(param, descr, types):
|
||||
if type(param).__name__ not in types:
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be one of {}".format(param, types)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def check_and_change_lower(param, valid_list, descr=""):
|
||||
if type(param).__name__ != "str":
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} not supported, should be one of {}".format(param, valid_list)
|
||||
)
|
||||
|
||||
lower_param = param.lower()
|
||||
if lower_param in valid_list:
|
||||
return lower_param
|
||||
else:
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} not supported, should be one of {}".format(param, valid_list)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _greater_equal_than(value, limit):
|
||||
return value >= limit - settings.FLOAT_ZERO
|
||||
|
||||
@staticmethod
|
||||
def _less_equal_than(value, limit):
|
||||
return value <= limit + settings.FLOAT_ZERO
|
||||
|
||||
@staticmethod
|
||||
def _range(value, ranges):
|
||||
in_range = False
|
||||
for left_limit, right_limit in ranges:
|
||||
if (
|
||||
left_limit - settings.FLOAT_ZERO
|
||||
<= value
|
||||
<= right_limit + settings.FLOAT_ZERO
|
||||
):
|
||||
in_range = True
|
||||
break
|
||||
|
||||
return in_range
|
||||
|
||||
@staticmethod
|
||||
def _in(value, right_value_list):
|
||||
return value in right_value_list
|
||||
|
||||
@staticmethod
|
||||
def _not_in(value, wrong_value_list):
|
||||
return value not in wrong_value_list
|
||||
|
||||
def _warn_deprecated_param(self, param_name, descr):
|
||||
if self._deprecated_params_set.get(param_name):
|
||||
logging.warning(
|
||||
f"{descr} {param_name} is deprecated and ignored in this version."
|
||||
)
|
||||
|
||||
def _warn_to_deprecate_param(self, param_name, descr, new_param):
|
||||
if self._deprecated_params_set.get(param_name):
|
||||
logging.warning(
|
||||
f"{descr} {param_name} will be deprecated in future release; "
|
||||
f"please use {new_param} instead."
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class ComponentBase(ABC):
|
||||
component_name: str
|
||||
thread_limiter = trio.CapacityLimiter(int(os.environ.get('MAX_CONCURRENT_CHATS', 10)))
|
||||
variable_ref_patt = r"\{* *\{([a-zA-Z:0-9]+@[A-Za-z:0-9_.-]+|sys\.[a-z_]+)\} *\}*"
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
{
|
||||
"component_name": "Begin",
|
||||
"params": {}
|
||||
}
|
||||
"""
|
||||
return """{{
|
||||
"component_name": "{}",
|
||||
"params": {}
|
||||
}}""".format(self.component_name,
|
||||
self._param
|
||||
)
|
||||
|
||||
def __init__(self, canvas, id, param: ComponentParamBase):
|
||||
from agent.canvas import Graph # Local import to avoid cyclic dependency
|
||||
assert isinstance(canvas, Graph), "canvas must be an instance of Canvas"
|
||||
self._canvas = canvas
|
||||
self._id = id
|
||||
self._param = param
|
||||
self._param.check()
|
||||
|
||||
def invoke(self, **kwargs) -> dict[str, Any]:
|
||||
self.set_output("_created_time", time.perf_counter())
|
||||
try:
|
||||
self._invoke(**kwargs)
|
||||
except Exception as e:
|
||||
if self.get_exception_default_value():
|
||||
self.set_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", str(e))
|
||||
logging.exception(e)
|
||||
self._param.debug_inputs = {}
|
||||
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
||||
return self.output()
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
raise NotImplementedError()
|
||||
|
||||
def output(self, var_nm: str=None) -> Union[dict[str, Any], Any]:
|
||||
if var_nm:
|
||||
return self._param.outputs.get(var_nm, {}).get("value", "")
|
||||
return {k: o.get("value") for k,o in self._param.outputs.items()}
|
||||
|
||||
def set_output(self, key: str, value: Any):
|
||||
if key not in self._param.outputs:
|
||||
self._param.outputs[key] = {"value": None, "type": str(type(value))}
|
||||
self._param.outputs[key]["value"] = value
|
||||
|
||||
def error(self):
|
||||
return self._param.outputs.get("_ERROR", {}).get("value")
|
||||
|
||||
def reset(self, only_output=False):
|
||||
for k in self._param.outputs.keys():
|
||||
self._param.outputs[k]["value"] = None
|
||||
if only_output:
|
||||
return
|
||||
for k in self._param.inputs.keys():
|
||||
self._param.inputs[k]["value"] = None
|
||||
self._param.debug_inputs = {}
|
||||
|
||||
def get_input(self, key: str=None) -> Union[Any, dict[str, Any]]:
|
||||
if key:
|
||||
return self._param.inputs.get(key, {}).get("value")
|
||||
|
||||
res = {}
|
||||
for var, o in self.get_input_elements().items():
|
||||
v = self.get_param(var)
|
||||
if v is None:
|
||||
continue
|
||||
if isinstance(v, str) and self._canvas.is_reff(v):
|
||||
self.set_input_value(var, self._canvas.get_variable_value(v))
|
||||
else:
|
||||
self.set_input_value(var, v)
|
||||
res[var] = self.get_input_value(var)
|
||||
return res
|
||||
|
||||
def get_input_values(self) -> Union[Any, dict[str, Any]]:
|
||||
if self._param.debug_inputs:
|
||||
return self._param.debug_inputs
|
||||
|
||||
return {var: self.get_input_value(var) for var, o in self.get_input_elements().items()}
|
||||
|
||||
def get_input_elements_from_text(self, txt: str) -> dict[str, dict[str, str]]:
|
||||
res = {}
|
||||
for r in re.finditer(self.variable_ref_patt, txt, flags=re.IGNORECASE|re.DOTALL):
|
||||
exp = r.group(1)
|
||||
cpn_id, var_nm = exp.split("@") if exp.find("@")>0 else ("", exp)
|
||||
res[exp] = {
|
||||
"name": (self._canvas.get_component_name(cpn_id) +f"@{var_nm}") if cpn_id else exp,
|
||||
"value": self._canvas.get_variable_value(exp),
|
||||
"_retrival": self._canvas.get_variable_value(f"{cpn_id}@_references") if cpn_id else None,
|
||||
"_cpn_id": cpn_id
|
||||
}
|
||||
return res
|
||||
|
||||
def get_input_elements(self) -> dict[str, Any]:
|
||||
return self._param.inputs
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return self._param.get_input_form()
|
||||
|
||||
def set_input_value(self, key: str, value: Any) -> None:
|
||||
if key not in self._param.inputs:
|
||||
self._param.inputs[key] = {"value": None}
|
||||
self._param.inputs[key]["value"] = value
|
||||
|
||||
def get_input_value(self, key: str) -> Any:
|
||||
if key not in self._param.inputs:
|
||||
return None
|
||||
return self._param.inputs[key].get("value")
|
||||
|
||||
def get_component_name(self, cpn_id) -> str:
|
||||
return self._canvas.get_component(cpn_id)["obj"].component_name.lower()
|
||||
|
||||
def get_param(self, name):
|
||||
if hasattr(self._param, name):
|
||||
return getattr(self._param, name)
|
||||
|
||||
def debug(self, **kwargs):
|
||||
return self._invoke(**kwargs)
|
||||
|
||||
def get_parent(self) -> Union[object, None]:
|
||||
pid = self._canvas.get_component(self._id).get("parent_id")
|
||||
if not pid:
|
||||
return
|
||||
return self._canvas.get_component(pid)["obj"]
|
||||
|
||||
def get_upstream(self) -> List[str]:
|
||||
cpn_nms = self._canvas.get_component(self._id)['upstream']
|
||||
return cpn_nms
|
||||
|
||||
def get_downstream(self) -> List[str]:
|
||||
cpn_nms = self._canvas.get_component(self._id)['downstream']
|
||||
return cpn_nms
|
||||
|
||||
@staticmethod
|
||||
def string_format(content: str, kv: dict[str, str]) -> str:
|
||||
for n, v in kv.items():
|
||||
def repl(_match, val=v):
|
||||
return str(val) if val is not None else ""
|
||||
content = re.sub(
|
||||
r"\{%s\}" % re.escape(n),
|
||||
repl,
|
||||
content
|
||||
)
|
||||
return content
|
||||
|
||||
def exception_handler(self):
|
||||
if not self._param.exception_method:
|
||||
return
|
||||
return {
|
||||
"goto": self._param.exception_goto,
|
||||
"default_value": self._param.exception_default_value
|
||||
}
|
||||
|
||||
def get_exception_default_value(self):
|
||||
if self._param.exception_method != "comment":
|
||||
return ""
|
||||
return self._param.exception_default_value
|
||||
|
||||
def set_exception_default_value(self):
|
||||
self.set_output("result", self.get_exception_default_value())
|
||||
|
||||
def thoughts(self) -> str:
|
||||
raise NotImplementedError()
|
||||
52
agent/component/begin.py
Normal file
52
agent/component/begin.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from agent.component.fillup import UserFillUpParam, UserFillUp
|
||||
|
||||
|
||||
class BeginParam(UserFillUpParam):
|
||||
|
||||
"""
|
||||
Define the Begin component parameters.
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.mode = "conversational"
|
||||
self.prologue = "Hi! I'm your smart assistant. What can I do for you?"
|
||||
|
||||
def check(self):
|
||||
self.check_valid_value(self.mode, "The 'mode' should be either `conversational` or `task`", ["conversational", "task"])
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return getattr(self, "inputs")
|
||||
|
||||
|
||||
class Begin(UserFillUp):
|
||||
component_name = "Begin"
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
for k, v in kwargs.get("inputs", {}).items():
|
||||
if isinstance(v, dict) and v.get("type", "").lower().find("file") >=0:
|
||||
if v.get("optional") and v.get("value", None) is None:
|
||||
v = None
|
||||
else:
|
||||
v = self._canvas.get_files([v["value"]])
|
||||
else:
|
||||
v = v.get("value")
|
||||
self.set_output(k, v)
|
||||
self.set_input_value(k, v)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return ""
|
||||
137
agent/component/categorize.py
Normal file
137
agent/component/categorize.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from abc import ABC
|
||||
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from agent.component.llm import LLMParam, LLM
|
||||
from api.utils.api_utils import timeout
|
||||
from rag.llm.chat_model import ERROR_PREFIX
|
||||
|
||||
|
||||
class CategorizeParam(LLMParam):
|
||||
|
||||
"""
|
||||
Define the categorize component parameters.
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.category_description = {}
|
||||
self.query = "sys.query"
|
||||
self.message_history_window_size = 1
|
||||
self.update_prompt()
|
||||
|
||||
def check(self):
|
||||
self.check_positive_integer(self.message_history_window_size, "[Categorize] Message window size > 0")
|
||||
self.check_empty(self.category_description, "[Categorize] Category examples")
|
||||
for k, v in self.category_description.items():
|
||||
if not k:
|
||||
raise ValueError("[Categorize] Category name can not be empty!")
|
||||
if not v.get("to"):
|
||||
raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return {
|
||||
"query": {
|
||||
"type": "line",
|
||||
"name": "Query"
|
||||
}
|
||||
}
|
||||
|
||||
def update_prompt(self):
|
||||
cate_lines = []
|
||||
for c, desc in self.category_description.items():
|
||||
for line in desc.get("examples", []):
|
||||
if not line:
|
||||
continue
|
||||
cate_lines.append("USER: \"" + re.sub(r"\n", " ", line, flags=re.DOTALL) + "\" → "+c)
|
||||
|
||||
descriptions = []
|
||||
for c, desc in self.category_description.items():
|
||||
if desc.get("description"):
|
||||
descriptions.append(
|
||||
"\n------\nCategory: {}\nDescription: {}".format(c, desc["description"]))
|
||||
|
||||
self.sys_prompt = """
|
||||
You are an advanced classification system that categorizes user questions into specific types. Analyze the input question and classify it into ONE of the following categories:
|
||||
{}
|
||||
|
||||
Here's description of each category:
|
||||
- {}
|
||||
|
||||
---- Instructions ----
|
||||
- Consider both explicit mentions and implied context
|
||||
- Prioritize the most specific applicable category
|
||||
- Return only the category name without explanations
|
||||
- Use "Other" only when no other category fits
|
||||
|
||||
""".format(
|
||||
"\n - ".join(list(self.category_description.keys())),
|
||||
"\n".join(descriptions)
|
||||
)
|
||||
|
||||
if cate_lines:
|
||||
self.sys_prompt += """
|
||||
---- Examples ----
|
||||
{}
|
||||
""".format("\n".join(cate_lines))
|
||||
|
||||
|
||||
class Categorize(LLM, ABC):
|
||||
component_name = "Categorize"
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
msg = self._canvas.get_history(self._param.message_history_window_size)
|
||||
if not msg:
|
||||
msg = [{"role": "user", "content": ""}]
|
||||
if kwargs.get("sys.query"):
|
||||
msg[-1]["content"] = kwargs["sys.query"]
|
||||
self.set_input_value("sys.query", kwargs["sys.query"])
|
||||
else:
|
||||
msg[-1]["content"] = self._canvas.get_variable_value(self._param.query)
|
||||
self.set_input_value(self._param.query, msg[-1]["content"])
|
||||
self._param.update_prompt()
|
||||
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
||||
|
||||
user_prompt = """
|
||||
---- Real Data ----
|
||||
{} →
|
||||
""".format(" | ".join(["{}: \"{}\"".format(c["role"].upper(), re.sub(r"\n", "", c["content"], flags=re.DOTALL)) for c in msg]))
|
||||
ans = chat_mdl.chat(self._param.sys_prompt, [{"role": "user", "content": user_prompt}], self._param.gen_conf())
|
||||
logging.info(f"input: {user_prompt}, answer: {str(ans)}")
|
||||
if ERROR_PREFIX in ans:
|
||||
raise Exception(ans)
|
||||
# Count the number of times each category appears in the answer.
|
||||
category_counts = {}
|
||||
for c in self._param.category_description.keys():
|
||||
count = ans.lower().count(c.lower())
|
||||
category_counts[c] = count
|
||||
|
||||
cpn_ids = list(self._param.category_description.items())[-1][1]["to"]
|
||||
max_category = list(self._param.category_description.keys())[0]
|
||||
if any(category_counts.values()):
|
||||
max_category = max(category_counts.items(), key=lambda x: x[1])[0]
|
||||
cpn_ids = self._param.category_description[max_category]["to"]
|
||||
|
||||
self.set_output("category_name", max_category)
|
||||
self.set_output("_next", cpn_ids)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "Which should it falls into {}? ...".format(",".join([f"`{c}`" for c, _ in self._param.category_description.items()]))
|
||||
40
agent/component/fillup.py
Normal file
40
agent/component/fillup.py
Normal file
@@ -0,0 +1,40 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from agent.component.base import ComponentBase, ComponentParamBase
|
||||
|
||||
|
||||
class UserFillUpParam(ComponentParamBase):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.enable_tips = True
|
||||
self.tips = "Please fill up the form"
|
||||
|
||||
def check(self) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class UserFillUp(ComponentBase):
|
||||
component_name = "UserFillUp"
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
for k, v in kwargs.get("inputs", {}).items():
|
||||
self.set_output(k, v)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "Waiting for your input..."
|
||||
|
||||
|
||||
135
agent/component/invoke.py
Normal file
135
agent/component/invoke.py
Normal file
@@ -0,0 +1,135 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from abc import ABC
|
||||
|
||||
import requests
|
||||
|
||||
from agent.component.base import ComponentBase, ComponentParamBase
|
||||
from api.utils.api_utils import timeout
|
||||
from deepdoc.parser import HtmlParser
|
||||
|
||||
|
||||
class InvokeParam(ComponentParamBase):
|
||||
"""
|
||||
Define the Crawler component parameters.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.proxy = None
|
||||
self.headers = ""
|
||||
self.method = "get"
|
||||
self.variables = []
|
||||
self.url = ""
|
||||
self.timeout = 60
|
||||
self.clean_html = False
|
||||
self.datatype = "json" # New parameter to determine data posting type
|
||||
|
||||
def check(self):
|
||||
self.check_valid_value(self.method.lower(), "Type of content from the crawler", ["get", "post", "put"])
|
||||
self.check_empty(self.url, "End point URL")
|
||||
self.check_positive_integer(self.timeout, "Timeout time in second")
|
||||
self.check_boolean(self.clean_html, "Clean HTML")
|
||||
self.check_valid_value(self.datatype.lower(), "Data post type", ["json", "formdata"]) # Check for valid datapost value
|
||||
|
||||
|
||||
class Invoke(ComponentBase, ABC):
|
||||
component_name = "Invoke"
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3)))
|
||||
def _invoke(self, **kwargs):
|
||||
args = {}
|
||||
for para in self._param.variables:
|
||||
if para.get("value"):
|
||||
args[para["key"]] = para["value"]
|
||||
else:
|
||||
args[para["key"]] = self._canvas.get_variable_value(para["ref"])
|
||||
|
||||
url = self._param.url.strip()
|
||||
|
||||
def replace_variable(match):
|
||||
var_name = match.group(1)
|
||||
try:
|
||||
value = self._canvas.get_variable_value(var_name)
|
||||
return str(value or "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
# {base_url} or {component_id@variable_name}
|
||||
url = re.sub(r"\{([a-zA-Z_][a-zA-Z0-9_.@-]*)\}", replace_variable, url)
|
||||
|
||||
if url.find("http") != 0:
|
||||
url = "http://" + url
|
||||
|
||||
method = self._param.method.lower()
|
||||
headers = {}
|
||||
if self._param.headers:
|
||||
headers = json.loads(self._param.headers)
|
||||
proxies = None
|
||||
if re.sub(r"https?:?/?/?", "", self._param.proxy):
|
||||
proxies = {"http": self._param.proxy, "https": self._param.proxy}
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
try:
|
||||
if method == "get":
|
||||
response = requests.get(url=url, params=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||
if self._param.clean_html:
|
||||
sections = HtmlParser()(None, response.content)
|
||||
self.set_output("result", "\n".join(sections))
|
||||
else:
|
||||
self.set_output("result", response.text)
|
||||
|
||||
if method == "put":
|
||||
if self._param.datatype.lower() == "json":
|
||||
response = requests.put(url=url, json=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||
else:
|
||||
response = requests.put(url=url, data=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||
if self._param.clean_html:
|
||||
sections = HtmlParser()(None, response.content)
|
||||
self.set_output("result", "\n".join(sections))
|
||||
else:
|
||||
self.set_output("result", response.text)
|
||||
|
||||
if method == "post":
|
||||
if self._param.datatype.lower() == "json":
|
||||
response = requests.post(url=url, json=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||
else:
|
||||
response = requests.post(url=url, data=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||
if self._param.clean_html:
|
||||
self.set_output("result", "\n".join(sections))
|
||||
else:
|
||||
self.set_output("result", response.text)
|
||||
|
||||
return self.output("result")
|
||||
except Exception as e:
|
||||
last_e = e
|
||||
logging.exception(f"Http request error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
if last_e:
|
||||
self.set_output("_ERROR", str(last_e))
|
||||
return f"Http request error: {last_e}"
|
||||
|
||||
assert False, self.output()
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "Waiting for the server respond..."
|
||||
60
agent/component/iteration.py
Normal file
60
agent/component/iteration.py
Normal file
@@ -0,0 +1,60 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from abc import ABC
|
||||
from agent.component.base import ComponentBase, ComponentParamBase
|
||||
|
||||
|
||||
class IterationParam(ComponentParamBase):
|
||||
"""
|
||||
Define the Iteration component parameters.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.items_ref = ""
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return {
|
||||
"items": {
|
||||
"type": "json",
|
||||
"name": "Items"
|
||||
}
|
||||
}
|
||||
|
||||
def check(self):
|
||||
return True
|
||||
|
||||
|
||||
class Iteration(ComponentBase, ABC):
|
||||
component_name = "Iteration"
|
||||
|
||||
def get_start(self):
|
||||
for cid in self._canvas.components.keys():
|
||||
if self._canvas.get_component(cid)["obj"].component_name.lower() != "iterationitem":
|
||||
continue
|
||||
if self._canvas.get_component(cid)["parent_id"] == self._id:
|
||||
return cid
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
arr = self._canvas.get_variable_value(self._param.items_ref)
|
||||
if not isinstance(arr, list):
|
||||
self.set_output("_ERROR", self._param.items_ref + " must be an array, but its type is "+str(type(arr)))
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "Need to process {} items.".format(len(self._canvas.get_variable_value(self._param.items_ref)))
|
||||
|
||||
|
||||
|
||||
83
agent/component/iterationitem.py
Normal file
83
agent/component/iterationitem.py
Normal file
@@ -0,0 +1,83 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from abc import ABC
|
||||
from agent.component.base import ComponentBase, ComponentParamBase
|
||||
|
||||
|
||||
class IterationItemParam(ComponentParamBase):
|
||||
"""
|
||||
Define the IterationItem component parameters.
|
||||
"""
|
||||
def check(self):
|
||||
return True
|
||||
|
||||
|
||||
class IterationItem(ComponentBase, ABC):
|
||||
component_name = "IterationItem"
|
||||
|
||||
def __init__(self, canvas, id, param: ComponentParamBase):
|
||||
super().__init__(canvas, id, param)
|
||||
self._idx = 0
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
parent = self.get_parent()
|
||||
arr = self._canvas.get_variable_value(parent._param.items_ref)
|
||||
if not isinstance(arr, list):
|
||||
self._idx = -1
|
||||
raise Exception(parent._param.items_ref + " must be an array, but its type is "+str(type(arr)))
|
||||
|
||||
if self._idx > 0:
|
||||
self.output_collation()
|
||||
|
||||
if self._idx >= len(arr):
|
||||
self._idx = -1
|
||||
return
|
||||
|
||||
self.set_output("item", arr[self._idx])
|
||||
self.set_output("index", self._idx)
|
||||
|
||||
self._idx += 1
|
||||
|
||||
def output_collation(self):
|
||||
pid = self.get_parent()._id
|
||||
for cid in self._canvas.components.keys():
|
||||
obj = self._canvas.get_component_obj(cid)
|
||||
p = obj.get_parent()
|
||||
if not p:
|
||||
continue
|
||||
if p._id != pid:
|
||||
continue
|
||||
|
||||
if p.component_name.lower() in ["categorize", "message", "switch", "userfillup", "interationitem"]:
|
||||
continue
|
||||
|
||||
for k, o in p._param.outputs.items():
|
||||
if "ref" not in o:
|
||||
continue
|
||||
_cid, var = o["ref"].split("@")
|
||||
if _cid != cid:
|
||||
continue
|
||||
res = p.output(k)
|
||||
if not res:
|
||||
res = []
|
||||
res.append(obj.output(var))
|
||||
p.set_output(k, res)
|
||||
|
||||
def end(self):
|
||||
return self._idx == -1
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "Next turn..."
|
||||
286
agent/component/llm.py
Normal file
286
agent/component/llm.py
Normal file
@@ -0,0 +1,286 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from copy import deepcopy
|
||||
from typing import Any, Generator
|
||||
import json_repair
|
||||
from functools import partial
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from agent.component.base import ComponentBase, ComponentParamBase
|
||||
from api.utils.api_utils import timeout
|
||||
from rag.prompts.generator import tool_call_summary, message_fit_in, citation_prompt
|
||||
|
||||
|
||||
class LLMParam(ComponentParamBase):
|
||||
"""
|
||||
Define the LLM component parameters.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.llm_id = ""
|
||||
self.sys_prompt = ""
|
||||
self.prompts = [{"role": "user", "content": "{sys.query}"}]
|
||||
self.max_tokens = 0
|
||||
self.temperature = 0
|
||||
self.top_p = 0
|
||||
self.presence_penalty = 0
|
||||
self.frequency_penalty = 0
|
||||
self.output_structure = None
|
||||
self.cite = True
|
||||
self.visual_files_var = None
|
||||
|
||||
def check(self):
|
||||
self.check_decimal_float(float(self.temperature), "[Agent] Temperature")
|
||||
self.check_decimal_float(float(self.presence_penalty), "[Agent] Presence penalty")
|
||||
self.check_decimal_float(float(self.frequency_penalty), "[Agent] Frequency penalty")
|
||||
self.check_nonnegative_number(int(self.max_tokens), "[Agent] Max tokens")
|
||||
self.check_decimal_float(float(self.top_p), "[Agent] Top P")
|
||||
self.check_empty(self.llm_id, "[Agent] LLM")
|
||||
self.check_empty(self.sys_prompt, "[Agent] System prompt")
|
||||
self.check_empty(self.prompts, "[Agent] User prompt")
|
||||
|
||||
def gen_conf(self):
|
||||
conf = {}
|
||||
def get_attr(nm):
|
||||
try:
|
||||
return getattr(self, nm)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if int(self.max_tokens) > 0 and get_attr("maxTokensEnabled"):
|
||||
conf["max_tokens"] = int(self.max_tokens)
|
||||
if float(self.temperature) > 0 and get_attr("temperatureEnabled"):
|
||||
conf["temperature"] = float(self.temperature)
|
||||
if float(self.top_p) > 0 and get_attr("topPEnabled"):
|
||||
conf["top_p"] = float(self.top_p)
|
||||
if float(self.presence_penalty) > 0 and get_attr("presencePenaltyEnabled"):
|
||||
conf["presence_penalty"] = float(self.presence_penalty)
|
||||
if float(self.frequency_penalty) > 0 and get_attr("frequencyPenaltyEnabled"):
|
||||
conf["frequency_penalty"] = float(self.frequency_penalty)
|
||||
return conf
|
||||
|
||||
|
||||
class LLM(ComponentBase):
|
||||
component_name = "LLM"
|
||||
|
||||
def __init__(self, canvas, component_id, param: ComponentParamBase):
|
||||
super().__init__(canvas, component_id, param)
|
||||
self.chat_mdl = LLMBundle(self._canvas.get_tenant_id(), TenantLLMService.llm_id2llm_type(self._param.llm_id),
|
||||
self._param.llm_id, max_retries=self._param.max_retries,
|
||||
retry_interval=self._param.delay_after_error
|
||||
)
|
||||
self.imgs = []
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
res = {}
|
||||
for k, v in self.get_input_elements().items():
|
||||
res[k] = {
|
||||
"type": "line",
|
||||
"name": v["name"]
|
||||
}
|
||||
return res
|
||||
|
||||
def get_input_elements(self) -> dict[str, Any]:
|
||||
res = self.get_input_elements_from_text(self._param.sys_prompt)
|
||||
if isinstance(self._param.prompts, str):
|
||||
self._param.prompts = [{"role": "user", "content": self._param.prompts}]
|
||||
for prompt in self._param.prompts:
|
||||
d = self.get_input_elements_from_text(prompt["content"])
|
||||
res.update(d)
|
||||
return res
|
||||
|
||||
def set_debug_inputs(self, inputs: dict[str, dict]):
|
||||
self._param.debug_inputs = inputs
|
||||
|
||||
def add2system_prompt(self, txt):
|
||||
self._param.sys_prompt += txt
|
||||
|
||||
def _sys_prompt_and_msg(self, msg, args):
|
||||
if isinstance(self._param.prompts, str):
|
||||
self._param.prompts = [{"role": "user", "content": self._param.prompts}]
|
||||
for p in self._param.prompts:
|
||||
if msg and msg[-1]["role"] == p["role"]:
|
||||
continue
|
||||
p = deepcopy(p)
|
||||
p["content"] = self.string_format(p["content"], args)
|
||||
msg.append(p)
|
||||
return msg, self.string_format(self._param.sys_prompt, args)
|
||||
|
||||
def _prepare_prompt_variables(self):
|
||||
if self._param.visual_files_var:
|
||||
self.imgs = self._canvas.get_variable_value(self._param.visual_files_var)
|
||||
if not self.imgs:
|
||||
self.imgs = []
|
||||
self.imgs = [img for img in self.imgs if img[:len("data:image/")] == "data:image/"]
|
||||
if self.imgs and TenantLLMService.llm_id2llm_type(self._param.llm_id) == LLMType.CHAT.value:
|
||||
self.chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT.value,
|
||||
self._param.llm_id, max_retries=self._param.max_retries,
|
||||
retry_interval=self._param.delay_after_error
|
||||
)
|
||||
|
||||
|
||||
args = {}
|
||||
vars = self.get_input_elements() if not self._param.debug_inputs else self._param.debug_inputs
|
||||
for k, o in vars.items():
|
||||
args[k] = o["value"]
|
||||
if not isinstance(args[k], str):
|
||||
try:
|
||||
args[k] = json.dumps(args[k], ensure_ascii=False)
|
||||
except Exception:
|
||||
args[k] = str(args[k])
|
||||
self.set_input_value(k, args[k])
|
||||
|
||||
msg, sys_prompt = self._sys_prompt_and_msg(self._canvas.get_history(self._param.message_history_window_size)[:-1], args)
|
||||
user_defined_prompt, sys_prompt = self._extract_prompts(sys_prompt)
|
||||
if self._param.cite and self._canvas.get_reference()["chunks"]:
|
||||
sys_prompt += citation_prompt(user_defined_prompt)
|
||||
|
||||
return sys_prompt, msg, user_defined_prompt
|
||||
|
||||
def _extract_prompts(self, sys_prompt):
|
||||
pts = {}
|
||||
for tag in ["TASK_ANALYSIS", "PLAN_GENERATION", "REFLECTION", "CONTEXT_SUMMARY", "CONTEXT_RANKING", "CITATION_GUIDELINES"]:
|
||||
r = re.search(rf"<{tag}>(.*?)</{tag}>", sys_prompt, flags=re.DOTALL|re.IGNORECASE)
|
||||
if not r:
|
||||
continue
|
||||
pts[tag.lower()] = r.group(1)
|
||||
sys_prompt = re.sub(rf"<{tag}>(.*?)</{tag}>", "", sys_prompt, flags=re.DOTALL|re.IGNORECASE)
|
||||
return pts, sys_prompt
|
||||
|
||||
def _generate(self, msg:list[dict], **kwargs) -> str:
|
||||
if not self.imgs:
|
||||
return self.chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf(), **kwargs)
|
||||
return self.chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs)
|
||||
|
||||
def _generate_streamly(self, msg:list[dict], **kwargs) -> Generator[str, None, None]:
|
||||
ans = ""
|
||||
last_idx = 0
|
||||
endswith_think = False
|
||||
def delta(txt):
|
||||
nonlocal ans, last_idx, endswith_think
|
||||
delta_ans = txt[last_idx:]
|
||||
ans = txt
|
||||
|
||||
if delta_ans.find("<think>") == 0:
|
||||
last_idx += len("<think>")
|
||||
return "<think>"
|
||||
elif delta_ans.find("<think>") > 0:
|
||||
delta_ans = txt[last_idx:last_idx+delta_ans.find("<think>")]
|
||||
last_idx += delta_ans.find("<think>")
|
||||
return delta_ans
|
||||
elif delta_ans.endswith("</think>"):
|
||||
endswith_think = True
|
||||
elif endswith_think:
|
||||
endswith_think = False
|
||||
return "</think>"
|
||||
|
||||
last_idx = len(ans)
|
||||
if ans.endswith("</think>"):
|
||||
last_idx -= len("</think>")
|
||||
return re.sub(r"(<think>|</think>)", "", delta_ans)
|
||||
|
||||
if not self.imgs:
|
||||
for txt in self.chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), **kwargs):
|
||||
yield delta(txt)
|
||||
else:
|
||||
for txt in self.chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs):
|
||||
yield delta(txt)
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
def clean_formated_answer(ans: str) -> str:
|
||||
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
ans = re.sub(r"^.*```json", "", ans, flags=re.DOTALL)
|
||||
return re.sub(r"```\n*$", "", ans, flags=re.DOTALL)
|
||||
|
||||
prompt, msg, _ = self._prepare_prompt_variables()
|
||||
error: str = ""
|
||||
|
||||
if self._param.output_structure:
|
||||
prompt += "\nThe output MUST follow this JSON format:\n"+json.dumps(self._param.output_structure, ensure_ascii=False, indent=2)
|
||||
prompt += "\nRedundant information is FORBIDDEN."
|
||||
for _ in range(self._param.max_retries+1):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
error = ""
|
||||
ans = self._generate(msg)
|
||||
msg.pop(0)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"LLM response error: {ans}")
|
||||
error = ans
|
||||
continue
|
||||
try:
|
||||
self.set_output("structured_content", json_repair.loads(clean_formated_answer(ans)))
|
||||
return
|
||||
except Exception:
|
||||
msg.append({"role": "user", "content": "The answer can't not be parsed as JSON"})
|
||||
error = "The answer can't not be parsed as JSON"
|
||||
if error:
|
||||
self.set_output("_ERROR", error)
|
||||
return
|
||||
|
||||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not self._param.output_structure and not (ex and ex["goto"]):
|
||||
self.set_output("content", partial(self._stream_output, prompt, msg))
|
||||
return
|
||||
|
||||
for _ in range(self._param.max_retries+1):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
error = ""
|
||||
ans = self._generate(msg)
|
||||
msg.pop(0)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"LLM response error: {ans}")
|
||||
error = ans
|
||||
continue
|
||||
self.set_output("content", ans)
|
||||
break
|
||||
|
||||
if error:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
else:
|
||||
self.set_output("_ERROR", error)
|
||||
|
||||
def _stream_output(self, prompt, msg):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer = ""
|
||||
for ans in self._generate_streamly(msg):
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
yield self.get_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", ans)
|
||||
return
|
||||
yield ans
|
||||
answer += ans
|
||||
self.set_output("content", answer)
|
||||
|
||||
def add_memory(self, user:str, assist:str, func_name: str, params: dict, results: str, user_defined_prompt:dict={}):
|
||||
summ = tool_call_summary(self.chat_mdl, func_name, params, results, user_defined_prompt)
|
||||
logging.info(f"[MEMORY]: {summ}")
|
||||
self._canvas.add_memory(user, assist, summ)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
_, msg,_ = self._prepare_prompt_variables()
|
||||
return "⌛Give me a moment—starting from: \n\n" + re.sub(r"(User's query:|[\\]+)", '', msg[-1]['content'], flags=re.DOTALL) + "\n\nI’ll figure out our best next move."
|
||||
150
agent/component/message.py
Normal file
150
agent/component/message.py
Normal file
@@ -0,0 +1,150 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
|
||||
from agent.component.base import ComponentBase, ComponentParamBase
|
||||
from jinja2 import Template as Jinja2Template
|
||||
|
||||
from api.utils.api_utils import timeout
|
||||
|
||||
|
||||
class MessageParam(ComponentParamBase):
|
||||
"""
|
||||
Define the Message component parameters.
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.content = []
|
||||
self.stream = True
|
||||
self.outputs = {
|
||||
"content": {
|
||||
"type": "str"
|
||||
}
|
||||
}
|
||||
|
||||
def check(self):
|
||||
self.check_empty(self.content, "[Message] Content")
|
||||
self.check_boolean(self.stream, "[Message] stream")
|
||||
return True
|
||||
|
||||
|
||||
class Message(ComponentBase):
|
||||
component_name = "Message"
|
||||
|
||||
def get_kwargs(self, script:str, kwargs:dict = {}, delimiter:str=None) -> tuple[str, dict[str, str | list | Any]]:
|
||||
for k,v in self.get_input_elements_from_text(script).items():
|
||||
if k in kwargs:
|
||||
continue
|
||||
v = v["value"]
|
||||
if not v:
|
||||
v = ""
|
||||
ans = ""
|
||||
if isinstance(v, partial):
|
||||
for t in v():
|
||||
ans += t
|
||||
elif isinstance(v, list) and delimiter:
|
||||
ans = delimiter.join([str(vv) for vv in v])
|
||||
elif not isinstance(v, str):
|
||||
try:
|
||||
ans = json.dumps(v, ensure_ascii=False)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
ans = v
|
||||
if not ans:
|
||||
ans = ""
|
||||
kwargs[k] = ans
|
||||
self.set_input_value(k, ans)
|
||||
|
||||
_kwargs = {}
|
||||
for n, v in kwargs.items():
|
||||
_n = re.sub("[@:.]", "_", n)
|
||||
script = re.sub(r"\{%s\}" % re.escape(n), _n, script)
|
||||
_kwargs[_n] = v
|
||||
return script, _kwargs
|
||||
|
||||
def _stream(self, rand_cnt:str):
|
||||
s = 0
|
||||
all_content = ""
|
||||
cache = {}
|
||||
for r in re.finditer(self.variable_ref_patt, rand_cnt, flags=re.DOTALL):
|
||||
all_content += rand_cnt[s: r.start()]
|
||||
yield rand_cnt[s: r.start()]
|
||||
s = r.end()
|
||||
exp = r.group(1)
|
||||
if exp in cache:
|
||||
yield cache[exp]
|
||||
all_content += cache[exp]
|
||||
continue
|
||||
|
||||
v = self._canvas.get_variable_value(exp)
|
||||
if not v:
|
||||
v = ""
|
||||
if isinstance(v, partial):
|
||||
cnt = ""
|
||||
for t in v():
|
||||
all_content += t
|
||||
cnt += t
|
||||
yield t
|
||||
|
||||
continue
|
||||
elif not isinstance(v, str):
|
||||
try:
|
||||
v = json.dumps(v, ensure_ascii=False, indent=2)
|
||||
except Exception:
|
||||
v = str(v)
|
||||
yield v
|
||||
all_content += v
|
||||
cache[exp] = v
|
||||
|
||||
if s < len(rand_cnt):
|
||||
all_content += rand_cnt[s: ]
|
||||
yield rand_cnt[s: ]
|
||||
|
||||
self.set_output("content", all_content)
|
||||
|
||||
def _is_jinjia2(self, content:str) -> bool:
|
||||
patt = [
|
||||
r"\{%.*%\}", "{{", "}}"
|
||||
]
|
||||
return any([re.search(p, content) for p in patt])
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
rand_cnt = random.choice(self._param.content)
|
||||
if self._param.stream and not self._is_jinjia2(rand_cnt):
|
||||
self.set_output("content", partial(self._stream, rand_cnt))
|
||||
return
|
||||
|
||||
rand_cnt, kwargs = self.get_kwargs(rand_cnt, kwargs)
|
||||
template = Jinja2Template(rand_cnt)
|
||||
try:
|
||||
content = template.render(kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for n, v in kwargs.items():
|
||||
content = re.sub(n, v, content)
|
||||
|
||||
self.set_output("content", content)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return ""
|
||||
100
agent/component/string_transform.py
Normal file
100
agent/component/string_transform.py
Normal file
@@ -0,0 +1,100 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
import re
|
||||
from abc import ABC
|
||||
from jinja2 import Template as Jinja2Template
|
||||
from agent.component.base import ComponentParamBase
|
||||
from api.utils.api_utils import timeout
|
||||
from .message import Message
|
||||
|
||||
|
||||
class StringTransformParam(ComponentParamBase):
|
||||
"""
|
||||
Define the code sandbox component parameters.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.method = "split"
|
||||
self.script = ""
|
||||
self.split_ref = ""
|
||||
self.delimiters = [","]
|
||||
self.outputs = {"result": {"value": "", "type": "string"}}
|
||||
|
||||
def check(self):
|
||||
self.check_valid_value(self.method, "Support method", ["split", "merge"])
|
||||
self.check_empty(self.delimiters, "delimiters")
|
||||
|
||||
|
||||
class StringTransform(Message, ABC):
|
||||
component_name = "StringTransform"
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
if self._param.method == "split":
|
||||
return {
|
||||
"line": {
|
||||
"name": "String",
|
||||
"type": "line"
|
||||
}
|
||||
}
|
||||
return {k: {
|
||||
"name": o["name"],
|
||||
"type": "line"
|
||||
} for k, o in self.get_input_elements_from_text(self._param.script).items()}
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self._param.method == "split":
|
||||
self._split(kwargs.get("line"))
|
||||
else:
|
||||
self._merge(kwargs)
|
||||
|
||||
def _split(self, line:str|None = None):
|
||||
var = self._canvas.get_variable_value(self._param.split_ref) if not line else line
|
||||
if not var:
|
||||
var = ""
|
||||
assert isinstance(var, str), "The input variable is not a string: {}".format(type(var))
|
||||
self.set_input_value(self._param.split_ref, var)
|
||||
res = []
|
||||
for i,s in enumerate(re.split(r"(%s)"%("|".join([re.escape(d) for d in self._param.delimiters])), var, flags=re.DOTALL)):
|
||||
if i % 2 == 1:
|
||||
continue
|
||||
res.append(s)
|
||||
self.set_output("result", res)
|
||||
|
||||
def _merge(self, kwargs:dict[str, str] = {}):
|
||||
script = self._param.script
|
||||
script, kwargs = self.get_kwargs(script, kwargs, self._param.delimiters[0])
|
||||
|
||||
if self._is_jinjia2(script):
|
||||
template = Jinja2Template(script)
|
||||
try:
|
||||
script = template.render(kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for k,v in kwargs.items():
|
||||
if not v:
|
||||
v = ""
|
||||
script = re.sub(k, lambda match: v, script)
|
||||
|
||||
self.set_output("result", script)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return f"It's {self._param.method}ing."
|
||||
|
||||
|
||||
131
agent/component/switch.py
Normal file
131
agent/component/switch.py
Normal file
@@ -0,0 +1,131 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import numbers
|
||||
import os
|
||||
from abc import ABC
|
||||
from typing import Any
|
||||
|
||||
from agent.component.base import ComponentBase, ComponentParamBase
|
||||
from api.utils.api_utils import timeout
|
||||
|
||||
|
||||
class SwitchParam(ComponentParamBase):
|
||||
"""
|
||||
Define the Switch component parameters.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
"""
|
||||
{
|
||||
"logical_operator" : "and | or"
|
||||
"items" : [
|
||||
{"cpn_id": "categorize:0", "operator": "contains", "value": ""},
|
||||
{"cpn_id": "categorize:0", "operator": "contains", "value": ""},...],
|
||||
"to": ""
|
||||
}
|
||||
"""
|
||||
self.conditions = []
|
||||
self.end_cpn_ids = []
|
||||
self.operators = ['contains', 'not contains', 'start with', 'end with', 'empty', 'not empty', '=', '≠', '>',
|
||||
'<', '≥', '≤']
|
||||
|
||||
def check(self):
|
||||
self.check_empty(self.conditions, "[Switch] conditions")
|
||||
for cond in self.conditions:
|
||||
if not cond["to"]:
|
||||
raise ValueError("[Switch] 'To' can not be empty!")
|
||||
self.check_empty(self.end_cpn_ids, "[Switch] the ELSE/Other destination can not be empty.")
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return {
|
||||
"urls": {
|
||||
"name": "URLs",
|
||||
"type": "line"
|
||||
}
|
||||
}
|
||||
|
||||
class Switch(ComponentBase, ABC):
|
||||
component_name = "Switch"
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3)))
|
||||
def _invoke(self, **kwargs):
|
||||
for cond in self._param.conditions:
|
||||
res = []
|
||||
for item in cond["items"]:
|
||||
if not item["cpn_id"]:
|
||||
continue
|
||||
cpn_v = self._canvas.get_variable_value(item["cpn_id"])
|
||||
self.set_input_value(item["cpn_id"], cpn_v)
|
||||
operatee = item.get("value", "")
|
||||
if isinstance(cpn_v, numbers.Number):
|
||||
operatee = float(operatee)
|
||||
res.append(self.process_operator(cpn_v, item["operator"], operatee))
|
||||
if cond["logical_operator"] != "and" and any(res):
|
||||
self.set_output("next", [self._canvas.get_component_name(cpn_id) for cpn_id in cond["to"]])
|
||||
self.set_output("_next", cond["to"])
|
||||
return
|
||||
|
||||
if all(res):
|
||||
self.set_output("next", [self._canvas.get_component_name(cpn_id) for cpn_id in cond["to"]])
|
||||
self.set_output("_next", cond["to"])
|
||||
return
|
||||
|
||||
self.set_output("next", [self._canvas.get_component_name(cpn_id) for cpn_id in self._param.end_cpn_ids])
|
||||
self.set_output("_next", self._param.end_cpn_ids)
|
||||
|
||||
def process_operator(self, input: Any, operator: str, value: Any) -> bool:
|
||||
if operator == "contains":
|
||||
return True if value.lower() in input.lower() else False
|
||||
elif operator == "not contains":
|
||||
return True if value.lower() not in input.lower() else False
|
||||
elif operator == "start with":
|
||||
return True if input.lower().startswith(value.lower()) else False
|
||||
elif operator == "end with":
|
||||
return True if input.lower().endswith(value.lower()) else False
|
||||
elif operator == "empty":
|
||||
return True if not input else False
|
||||
elif operator == "not empty":
|
||||
return True if input else False
|
||||
elif operator == "=":
|
||||
return True if input == value else False
|
||||
elif operator == "≠":
|
||||
return True if input != value else False
|
||||
elif operator == ">":
|
||||
try:
|
||||
return True if float(input) > float(value) else False
|
||||
except Exception:
|
||||
return True if input > value else False
|
||||
elif operator == "<":
|
||||
try:
|
||||
return True if float(input) < float(value) else False
|
||||
except Exception:
|
||||
return True if input < value else False
|
||||
elif operator == "≥":
|
||||
try:
|
||||
return True if float(input) >= float(value) else False
|
||||
except Exception:
|
||||
return True if input >= value else False
|
||||
elif operator == "≤":
|
||||
try:
|
||||
return True if float(input) <= float(value) else False
|
||||
except Exception:
|
||||
return True if input <= value else False
|
||||
|
||||
raise ValueError('Not supported operator' + operator)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "I’m weighing a few options and will pick the next step shortly."
|
||||
Reference in New Issue
Block a user