diff --git a/agenthub/coact_agent/__init__.py b/agenthub/coact_agent/__init__.py index 4f25bf07832..be2273f2d69 100644 --- a/agenthub/coact_agent/__init__.py +++ b/agenthub/coact_agent/__init__.py @@ -1,6 +1,10 @@ +from agenthub.coact_agent.executor.executor_agent import ( + LocalExecutorAgent as CoActExecutorAgent, +) from agenthub.coact_agent.planner.planner_agent import ( GlobalPlannerAgent as CoActPlannerAgent, ) from openhands.controller.agent import Agent Agent.register('CoActPlannerAgent', CoActPlannerAgent) +Agent.register('CoActExecutorAgent', CoActExecutorAgent) diff --git a/agenthub/coact_agent/executor/action_parser.py b/agenthub/coact_agent/executor/action_parser.py new file mode 100644 index 00000000000..752f940e841 --- /dev/null +++ b/agenthub/coact_agent/executor/action_parser.py @@ -0,0 +1,39 @@ +from agenthub.codeact_agent.action_parser import ( + CodeActActionParserAgentDelegate, + CodeActActionParserCmdRun, + CodeActActionParserFinish, + CodeActActionParserIPythonRunCell, + CodeActActionParserMessage, + CodeActResponseParser, +) + + +class ExecutorResponseParser(CodeActResponseParser): + """Parser action: + - CmdRunAction(command) - bash command to run + - IPythonRunCellAction(code) - IPython code to run + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + - MessageAction(content) - Message action to run (e.g. ask for clarification) + - AgentFinishAction() - end the interaction + """ + + def __init__(self): + # Need pay attention to the item order in self.action_parsers + super().__init__() + self.action_parsers = [ + CodeActActionParserFinish(), + CodeActActionParserCmdRun(), + CodeActActionParserIPythonRunCell(), + CodeActActionParserAgentDelegate(), + # TODO: additional parsers + ] + self.default_parser = CodeActActionParserMessage() + + def parse_response(self, response) -> str: + action = response.choices[0].message.content + if action is None: + return '' + for lang in ['bash', 'ipython', 'browse']: + if f'' in action and f'' not in action: + action += f'' + return action diff --git a/agenthub/coact_agent/executor/executor_agent.py b/agenthub/coact_agent/executor/executor_agent.py new file mode 100644 index 00000000000..ffc2cf86b39 --- /dev/null +++ b/agenthub/coact_agent/executor/executor_agent.py @@ -0,0 +1,12 @@ +from agenthub.coact_agent.executor.action_parser import ExecutorResponseParser +from agenthub.codeact_agent.codeact_agent import CodeActAgent +from openhands.core.config import AgentConfig +from openhands.llm.llm import LLM + + +class LocalExecutorAgent(CodeActAgent): + VERSION = '1.0' + + def __init__(self, llm: LLM, config: AgentConfig) -> None: + super().__init__(llm, config) + self.action_parser = ExecutorResponseParser() diff --git a/agenthub/coact_agent/executor/system_prompt.j2 b/agenthub/coact_agent/executor/system_prompt.j2 new file mode 100644 index 00000000000..4454df1ca32 --- /dev/null +++ b/agenthub/coact_agent/executor/system_prompt.j2 @@ -0,0 +1,47 @@ +{% set MINIMAL_SYSTEM_PREFIX %} +A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions. +The assistant can use a Python environment with , e.g.: + +print("Hello World!") + +The assistant can execute bash commands wrapped with , e.g. ls . +The assistant is not allowed to run interactive commands. For commands that may run indefinitely, +the output should be redirected to a file and the command run in the background, e.g. python3 app.py > server.log 2>&1 & +If a command execution result says "Command timed out. Sending SIGINT to the process", +the assistant should retry running the command in the background. +{% endset %} +{% set BROWSING_PREFIX %} +The assistant can browse the Internet with and . +For example, Tell me the usa's president using google search . +Or Tell me what is in http://example.com . +{% endset %} +{% set PIP_INSTALL_PREFIX %} +The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. +{% endset %} +{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX %} +{% set COMMAND_DOCS %} +Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: +{{ agent_skills_docs }} +IMPORTANT: +- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits! +- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write the line out, with all leading spaces before the code! +- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +- Any code issued should be less than 50 lines to avoid context being cut off! +- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content! +- For `edit_file_by_replace` NEVER provide empty parameters! +- For `edit_file_by_replace` the file must be read fully before any replacements! +{% endset %} +{% set SYSTEM_SUFFIX %} +Responses should be concise. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. +If the assistant is finished with the task you MUST include in your response. +IMPORTANT: Execute code using , , or whenever possible. +The assistant should utilize full file paths and the `pwd` command to prevent path-related errors. +The assistant must avoid apologies and thanks in its responses. + +{% endset %} +{# Combine all parts without newlines between them #} +{{ SYSTEM_PREFIX -}} +{{- COMMAND_DOCS -}} +{{- SYSTEM_SUFFIX }} diff --git a/agenthub/coact_agent/executor/user_prompt.j2 b/agenthub/coact_agent/executor/user_prompt.j2 new file mode 100644 index 00000000000..bae6bfc18cf --- /dev/null +++ b/agenthub/coact_agent/executor/user_prompt.j2 @@ -0,0 +1,231 @@ +{% set DEFAULT_EXAMPLE %} +--- START OF EXAMPLE --- + +USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000. + +ASSISTANT: +Sure! Let me create a Python file `app.py`: + +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +(this is the beginning of the file) +1| +(this is the end of the file) +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to the file `app.py`: + +EDITED_CODE="""from flask import Flask +app = Flask(__name__) + +@app.route('/') +def index(): + numbers = list(range(1, 11)) + return str(numbers) + +if __name__ == '__main__': + app.run(port=5000)""" + +insert_content_at_line( + 'app.py', + 1, + EDITED_CODE, +) + + +USER: +OBSERVATION: +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) +[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 + +ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: + +cat server.log + + +USER: +OBSERVATION: +Traceback (most recent call last): + File "/workspace/app.py", line 2, in + from flask import Flask +ModuleNotFoundError: No module named 'flask' + +ASSISTANT: +It seems that Flask is not installed. Let me install Flask for you: + +pip install flask + + +USER: +OBSERVATION: +Defaulting to user installation because normal site-packages is not writeable +Collecting flask + Using cached flask-3.0.3-py3-none-any.whl (101 kB) +Collecting blinker>=1.6.2 + Using cached blinker-1.7.0-py3-none-any.whl (13 kB) +Collecting Werkzeug>=3.0.0 + Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB) +Collecting click>=8.1.3 + Using cached click-8.1.7-py3-none-any.whl (97 kB) +Collecting itsdangerous>=2.1.2 + Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB) +Requirement already satisfied: Jinja2>=3.1.2 in /home/openhands/.local/lib/python3.10/site-packages (from flask) (3.1.3) +Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5) +Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask +Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0 + +ASSISTANT: +Now that Flask is installed, let me run the Python file again: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 124 + +ASSISTANT: +Let me check the server log again: + +cat server.log + + +USER: +OBSERVATION: +* Serving Flask app 'app' + * Debug mode: off +WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. + * Running on http://127.0.0.1:5000 +Press CTRL+C to quit + +ASSISTANT: +The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! + +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +Get the content on "http://127.0.0.1:5000" + + +USER: +OBSERVATION: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + +USER: Now kill the server, make it display the numbers in a table format. + +ASSISTANT: +Sure! Let me stop the server first: + +kill 124 + + +USER: +OBSERVATION: +[1]+ Terminated python3 app.py > server.log 2>&1 + +ASSISTANT: +The server has been stopped. Let me open the Python file and modify it. + +open_file('app.py') + + +USER: +[File: /workspace/app.py (10 lines total)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) + +ASSISTANT: +I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: + +edit_file_by_replace( + 'app.py', + to_replace=" return str(numbers)", + new_content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'", +) +
+ +USER: +Observation: +[File: /workspace/app.py (10 lines total after edit)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) +[File updated (edited at line 7). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +Running the updated file: + +python3 app.py > server.log 2>&1 & + + +USER: +Observation: +[1] 126 + +ASSISTANT: +The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests! + + +--- END OF EXAMPLE --- +{% endset %} +Here is an example of how you can interact with the environment for task solving: +{{ DEFAULT_EXAMPLE }} +{% if micro_agent %} +--- BEGIN OF GUIDELINE --- +The following information may assist you in completing your task: + +{{ micro_agent }} +--- END OF GUIDELINE --- +{% endif %} + +NOW, LET'S START! diff --git a/agenthub/coact_agent/planner/action_parser.py b/agenthub/coact_agent/planner/action_parser.py index 5aec357c687..55352766179 100644 --- a/agenthub/coact_agent/planner/action_parser.py +++ b/agenthub/coact_agent/planner/action_parser.py @@ -1,17 +1,21 @@ import re -from openhands.controller.action_parser import ActionParser, ResponseParser +from agenthub.codeact_agent.action_parser import ( + CodeActActionParserAgentDelegate, + CodeActActionParserCmdRun, + CodeActActionParserFinish, + CodeActActionParserIPythonRunCell, + CodeActActionParserMessage, + CodeActResponseParser, +) +from openhands.controller.action_parser import ActionParser from openhands.events.action import ( Action, AgentDelegateAction, - AgentFinishAction, - CmdRunAction, - IPythonRunCellAction, - MessageAction, ) -class PlannerResponseParser(ResponseParser): +class PlannerResponseParser(CodeActResponseParser): """Parser action: - CmdRunAction(command) - bash command to run - IPythonRunCellAction(code) - IPython code to run @@ -28,14 +32,10 @@ def __init__(self): CodeActActionParserCmdRun(), CodeActActionParserIPythonRunCell(), CodeActActionParserAgentDelegate(), - CodeActActionParserGlobalPlan(), + CoActActionParserGlobalPlan(), ] self.default_parser = CodeActActionParserMessage() - def parse(self, response) -> Action: - action_str = self.parse_response(response) - return self.parse_action(action_str) - def parse_response(self, response) -> str: action = response.choices[0].message.content if action is None: @@ -57,14 +57,8 @@ def parse_response(self, response) -> str: action += f'' return action - def parse_action(self, action_str: str) -> Action: - for action_parser in self.action_parsers: - if action_parser.check_condition(action_str): - return action_parser.parse(action_str) - return self.default_parser.parse(action_str) - -class CodeActActionParserGlobalPlan(ActionParser): +class CoActActionParserGlobalPlan(ActionParser): """Parser action: - AgentDelegateAction(agent, inputs) - delegate action for (sub)task """ @@ -87,135 +81,7 @@ def parse(self, action_str: str) -> Action: # thought = action_str.replace(self.global_plan.group(0), '').strip() global_plan_actions = self.global_plan.group(1).strip() return AgentDelegateAction( - agent='CodeActAgent', + agent='CoActExecutorAgent', inputs={'task': global_plan_actions}, action_suffix='global_plan', ) # FIXME: check to use a more proper key e.g. 'global_plan' instead of 'task' - - -class CodeActActionParserFinish(ActionParser): - """Parser action: - - AgentFinishAction() - end the interaction - """ - - def __init__( - self, - ): - self.finish_command = None - - def check_condition(self, action_str: str) -> bool: - self.finish_command = re.search(r'.*', action_str, re.DOTALL) - return self.finish_command is not None - - def parse(self, action_str: str) -> Action: - assert ( - self.finish_command is not None - ), 'self.finish_command should not be None when parse is called' - thought = action_str.replace(self.finish_command.group(0), '').strip() - return AgentFinishAction(thought=thought) - - -class CodeActActionParserCmdRun(ActionParser): - """Parser action: - - CmdRunAction(command) - bash command to run - - AgentFinishAction() - end the interaction - """ - - def __init__( - self, - ): - self.bash_command = None - - def check_condition(self, action_str: str) -> bool: - self.bash_command = re.search( - r'(.*?)', action_str, re.DOTALL - ) - return self.bash_command is not None - - def parse(self, action_str: str) -> Action: - assert ( - self.bash_command is not None - ), 'self.bash_command should not be None when parse is called' - thought = action_str.replace(self.bash_command.group(0), '').strip() - # a command was found - command_group = self.bash_command.group(1).strip() - if command_group.strip() == 'exit': - return AgentFinishAction(thought=thought) - return CmdRunAction(command=command_group, thought=thought) - - -class CodeActActionParserIPythonRunCell(ActionParser): - """Parser action: - - IPythonRunCellAction(code) - IPython code to run - """ - - def __init__( - self, - ): - self.python_code = None - self.jupyter_kernel_init_code: str = 'from agentskills import *' - - def check_condition(self, action_str: str) -> bool: - self.python_code = re.search( - r'(.*?)', action_str, re.DOTALL - ) - return self.python_code is not None - - def parse(self, action_str: str) -> Action: - assert ( - self.python_code is not None - ), 'self.python_code should not be None when parse is called' - code_group = self.python_code.group(1).strip() - thought = action_str.replace(self.python_code.group(0), '').strip() - return IPythonRunCellAction( - code=code_group, - thought=thought, - kernel_init_code=self.jupyter_kernel_init_code, - ) - - -class CodeActActionParserAgentDelegate(ActionParser): - """Parser action: - - AgentDelegateAction(agent, inputs) - delegate action for (sub)task - """ - - def __init__( - self, - ): - self.agent_delegate = None - - def check_condition(self, action_str: str) -> bool: - self.agent_delegate = re.search( - r'(.*)', action_str, re.DOTALL - ) - return self.agent_delegate is not None - - def parse(self, action_str: str) -> Action: - assert ( - self.agent_delegate is not None - ), 'self.agent_delegate should not be None when parse is called' - thought = action_str.replace(self.agent_delegate.group(0), '').strip() - browse_actions = self.agent_delegate.group(1).strip() - task = f'{thought}. I should start with: {browse_actions}' - return AgentDelegateAction( - agent='BrowsingAgent', inputs={'task': task}, action_suffix='browse' - ) - - -class CodeActActionParserMessage(ActionParser): - """Parser action: - - MessageAction(content) - Message action to run (e.g. ask for clarification) - """ - - def __init__( - self, - ): - pass - - def check_condition(self, action_str: str) -> bool: - # We assume the LLM is GOOD enough that when it returns pure natural language - # it wants to talk to the user - return True - - def parse(self, action_str: str) -> Action: - return MessageAction(content=action_str, wait_for_response=True) diff --git a/frontend/src/components/Workspace.tsx b/frontend/src/components/Workspace.tsx index 90034326d0e..3d9e1f53080 100644 --- a/frontend/src/components/Workspace.tsx +++ b/frontend/src/components/Workspace.tsx @@ -26,6 +26,7 @@ function Workspace() { CodeActAgent: [TabOption.JUPYTER], PlannerAgent: [TabOption.PLANNER], CoActPlannerAgent: [TabOption.JUPYTER], + CoActExecutorAgent: [TabOption.JUPYTER], }; const extraTabs = extraTabsMap[AGENT] || []; const showTabs = [...baseTabs, ...extraTabs];