diff --git a/agenthub/__init__.py b/agenthub/__init__.py index d91f2d3ec23..7016445f90e 100644 --- a/agenthub/__init__.py +++ b/agenthub/__init__.py @@ -9,6 +9,7 @@ from agenthub import ( # noqa: E402 browsing_agent, + coact_agent, codeact_agent, codeact_swe_agent, delegator_agent, @@ -23,6 +24,7 @@ 'delegator_agent', 'dummy_agent', 'browsing_agent', + 'coact_agent', ] for agent in all_microagents.values(): diff --git a/agenthub/coact_agent/README.md b/agenthub/coact_agent/README.md new file mode 100644 index 00000000000..a7f981e0e07 --- /dev/null +++ b/agenthub/coact_agent/README.md @@ -0,0 +1,12 @@ +# CoAct Multi-Agent Framework + +This folder implements a multi-agent workflow inspired by the CoAct framework ([paper](https://arxiv.org/abs/2406.13381)), that provides a robust structure for defining, planning, and executing tasks using multiple agents. + +## Agents + +1. `CoActPlannerAgent`: + - is responsible for exploring and creating a global plan. It can replan if there are issues with the previous one. + - has full capabilities of [CodeActAgent](https://github.com/All-Hands-AI/OpenHands/tree/main/agenthub/codeact_agent). +2. `CoActExecutorAgent`: + - is responsible for executing the proposed plan. Facing issues with the plan, it can request for a new one. + - also has full capabilities of [CodeActAgent](https://github.com/All-Hands-AI/OpenHands/tree/main/agenthub/codeact_agent). diff --git a/agenthub/coact_agent/__init__.py b/agenthub/coact_agent/__init__.py new file mode 100644 index 00000000000..be2273f2d69 --- /dev/null +++ b/agenthub/coact_agent/__init__.py @@ -0,0 +1,10 @@ +from agenthub.coact_agent.executor.executor_agent import ( + LocalExecutorAgent as CoActExecutorAgent, +) +from agenthub.coact_agent.planner.planner_agent import ( + GlobalPlannerAgent as CoActPlannerAgent, +) +from openhands.controller.agent import Agent + +Agent.register('CoActPlannerAgent', CoActPlannerAgent) +Agent.register('CoActExecutorAgent', CoActExecutorAgent) diff --git a/agenthub/coact_agent/executor/action_parser.py b/agenthub/coact_agent/executor/action_parser.py new file mode 100644 index 00000000000..b067d09ea35 --- /dev/null +++ b/agenthub/coact_agent/executor/action_parser.py @@ -0,0 +1,71 @@ +import re + +from agenthub.codeact_agent.action_parser import ( + CodeActActionParserAgentDelegate, + CodeActActionParserCmdRun, + CodeActActionParserFinish, + CodeActActionParserIPythonRunCell, + CodeActActionParserMessage, + CodeActResponseParser, +) +from openhands.controller.action_parser import ActionParser +from openhands.events.action import ( + Action, + AgentFinishAction, +) + + +class ExecutorResponseParser(CodeActResponseParser): + """Parser action: + - CmdRunAction(command) - bash command to run + - IPythonRunCellAction(code) - IPython code to run + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + - MessageAction(content) - Message action to run (e.g. ask for clarification) + - AgentFinishAction() - end the interaction + """ + + def __init__(self): + # Need pay attention to the item order in self.action_parsers + super().__init__() + self.action_parsers = [ + CodeActActionParserFinish(), + CodeActActionParserCmdRun(), + CodeActActionParserIPythonRunCell(), + CodeActActionParserAgentDelegate(), + CoActActionParserRequest(), + ] + self.default_parser = CodeActActionParserMessage() + + def parse_response(self, response) -> str: + action = response.choices[0].message.content + if action is None: + return '' + for action_suffix in ['bash', 'ipython', 'browse', 'request']: + if ( + f'' in action + and f'' not in action + ): + action += f'' + return action + + +class CoActActionParserRequest(ActionParser): + def __init__(self): + self.request = None + + def check_condition(self, action_str: str) -> bool: + self.request = re.search( + r'(.*)', action_str, re.DOTALL + ) + return self.request is not None + + def parse(self, action_str: str) -> Action: + assert ( + self.request is not None + ), 'self.global_plan should not be None when parse is called' + + replan_request = self.request.group(1).strip() + return AgentFinishAction( + thought=replan_request, + outputs={'content': replan_request}, + ) diff --git a/agenthub/coact_agent/executor/executor_agent.py b/agenthub/coact_agent/executor/executor_agent.py new file mode 100644 index 00000000000..fbbb9423838 --- /dev/null +++ b/agenthub/coact_agent/executor/executor_agent.py @@ -0,0 +1,22 @@ +import os + +from agenthub.coact_agent.executor.action_parser import ExecutorResponseParser +from agenthub.codeact_agent.codeact_agent import CodeActAgent +from openhands.core.config import AgentConfig +from openhands.llm.llm import LLM +from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement +from openhands.utils.prompt import PromptManager + + +class LocalExecutorAgent(CodeActAgent): + VERSION = '1.0' + + def __init__(self, llm: LLM, config: AgentConfig) -> None: + super().__init__(llm, config) + + self.action_parser = ExecutorResponseParser() + self.prompt_manager = PromptManager( + prompt_dir=os.path.join(os.path.dirname(__file__)), + agent_skills_docs=AgentSkillsRequirement.documentation, + micro_agent=self.micro_agent, + ) diff --git a/agenthub/coact_agent/executor/system_prompt.j2 b/agenthub/coact_agent/executor/system_prompt.j2 new file mode 100644 index 00000000000..b9fd5c819ec --- /dev/null +++ b/agenthub/coact_agent/executor/system_prompt.j2 @@ -0,0 +1,59 @@ +{% set MINIMAL_SYSTEM_PREFIX %} +You are an autonomous intelligent programming agent playing the role of a subordinate employee responsible for local planning and execution of specific tasks in a multi-tier task execution structure, tasked with software development. You will be given coding-based tasks. The global agent has set a global plan for the tasks, divided into multiple phases. These phase plans will be given to you as a whole. Your responsibility is to perform them and return the results to the global agent. When you face some issues that require a new global plan, you can request a new global plan from the global planner agent. + +Here's the information you'll have: +* The broken-down phase list: These are the tasks you're trying to complete now. +* The current codebase: This is what you need to navigate through and make the changes to complete the tasks given by the global agent. + +The agent can use a Python environment with , e.g.: + +print("Hello World!") + + +The agent can execute bash commands wrapped with , e.g. ls . +The agent is not allowed to run interactive commands. For commands that may run indefinitely, +the output should be redirected to a file and the command run in the background, e.g. python3 app.py > server.log 2>&1 & + +If a command execution result says "Command timed out. Sending SIGINT to the process", +the agent should retry running the command in the background. + +As a local executor agent, there are some additional actions that you can use to communicate back to the global planner agent: +- ``: You have encountered an exception in the execution process. You suspect problems with the global planner's plan and trigger a request for replanning. Explain why you decide to request a new global plan using this action. + +{% endset %} +{% set BROWSING_PREFIX %} +The assistant can browse the Internet with and . +For example, Tell me the usa's president using google search . +Or Tell me what is in http://example.com . +{% endset %} +{% set PIP_INSTALL_PREFIX %} +The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. +{% endset %} +{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX %} +{% set COMMAND_DOCS %} +Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment: +{{ agent_skills_docs }} +IMPORTANT: +- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits! +- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write the line out, with all leading spaces before the code! +- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +- Any code issued should be less than 50 lines to avoid context being cut off! +- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content! +- For `edit_file_by_replace` NEVER provide empty parameters! +- For `edit_file_by_replace` the file must be read fully before any replacements! +{% endset %} +{% set SYSTEM_SUFFIX %} +Responses should be concise. +The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed. +If the assistant is finished with the task you MUST include in your response. +IMPORTANT: Execute code using , , or whenever possible. +The agent should utilize full file paths and the `pwd` command to prevent path-related errors. +The agent must avoid apologies and thanks in its responses. +Remeber to execute ALL the phases of the global plan and only return the summary of the whole process to the global agent. + +{% endset %} +{# Combine all parts without newlines between them #} +{{ SYSTEM_PREFIX -}} +{{- COMMAND_DOCS -}} +{{- SYSTEM_SUFFIX }} diff --git a/agenthub/coact_agent/executor/user_prompt.j2 b/agenthub/coact_agent/executor/user_prompt.j2 new file mode 100644 index 00000000000..bae6bfc18cf --- /dev/null +++ b/agenthub/coact_agent/executor/user_prompt.j2 @@ -0,0 +1,231 @@ +{% set DEFAULT_EXAMPLE %} +--- START OF EXAMPLE --- + +USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000. + +ASSISTANT: +Sure! Let me create a Python file `app.py`: + +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +(this is the beginning of the file) +1| +(this is the end of the file) +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to the file `app.py`: + +EDITED_CODE="""from flask import Flask +app = Flask(__name__) + +@app.route('/') +def index(): + numbers = list(range(1, 11)) + return str(numbers) + +if __name__ == '__main__': + app.run(port=5000)""" + +insert_content_at_line( + 'app.py', + 1, + EDITED_CODE, +) + + +USER: +OBSERVATION: +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) +[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 + +ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: + +cat server.log + + +USER: +OBSERVATION: +Traceback (most recent call last): + File "/workspace/app.py", line 2, in + from flask import Flask +ModuleNotFoundError: No module named 'flask' + +ASSISTANT: +It seems that Flask is not installed. Let me install Flask for you: + +pip install flask + + +USER: +OBSERVATION: +Defaulting to user installation because normal site-packages is not writeable +Collecting flask + Using cached flask-3.0.3-py3-none-any.whl (101 kB) +Collecting blinker>=1.6.2 + Using cached blinker-1.7.0-py3-none-any.whl (13 kB) +Collecting Werkzeug>=3.0.0 + Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB) +Collecting click>=8.1.3 + Using cached click-8.1.7-py3-none-any.whl (97 kB) +Collecting itsdangerous>=2.1.2 + Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB) +Requirement already satisfied: Jinja2>=3.1.2 in /home/openhands/.local/lib/python3.10/site-packages (from flask) (3.1.3) +Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5) +Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask +Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0 + +ASSISTANT: +Now that Flask is installed, let me run the Python file again: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 124 + +ASSISTANT: +Let me check the server log again: + +cat server.log + + +USER: +OBSERVATION: +* Serving Flask app 'app' + * Debug mode: off +WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. + * Running on http://127.0.0.1:5000 +Press CTRL+C to quit + +ASSISTANT: +The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! + +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +Get the content on "http://127.0.0.1:5000" + + +USER: +OBSERVATION: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + +USER: Now kill the server, make it display the numbers in a table format. + +ASSISTANT: +Sure! Let me stop the server first: + +kill 124 + + +USER: +OBSERVATION: +[1]+ Terminated python3 app.py > server.log 2>&1 + +ASSISTANT: +The server has been stopped. Let me open the Python file and modify it. + +open_file('app.py') + + +USER: +[File: /workspace/app.py (10 lines total)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) + +ASSISTANT: +I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: + +edit_file_by_replace( + 'app.py', + to_replace=" return str(numbers)", + new_content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'", +) +
+ +USER: +Observation: +[File: /workspace/app.py (10 lines total after edit)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) +[File updated (edited at line 7). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +Running the updated file: + +python3 app.py > server.log 2>&1 & + + +USER: +Observation: +[1] 126 + +ASSISTANT: +The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests! + + +--- END OF EXAMPLE --- +{% endset %} +Here is an example of how you can interact with the environment for task solving: +{{ DEFAULT_EXAMPLE }} +{% if micro_agent %} +--- BEGIN OF GUIDELINE --- +The following information may assist you in completing your task: + +{{ micro_agent }} +--- END OF GUIDELINE --- +{% endif %} + +NOW, LET'S START! diff --git a/agenthub/coact_agent/meta/executor_prompt.py b/agenthub/coact_agent/meta/executor_prompt.py new file mode 100644 index 00000000000..6529af51c76 --- /dev/null +++ b/agenthub/coact_agent/meta/executor_prompt.py @@ -0,0 +1,37 @@ +EXECUTOR_PASS_CHECK = """Now, your role is to ensure the successful execution of actions in the global plan. Verify the results of these actions and compare them to the global plan. If they align, proceed to the next phase and output the action decision as ```move```. If discrepancies arise, you have two options: +1) If you suspect issues with your local plan, output the action ```revise```, or +2) If you suspect problems with the global planner's plan, trigger a request for replanning by outputting the action ```request```. +If the actions align with the global plan, explain the reasons for this alignment. If discrepancies arise, provide detailed reasons for your action decision: +- If you suspect issues with your local plan, explain why and use the action ```revise```. +- If you suspect problems with the global planner's plan, explain why and use the action ```request```. +Follow this structured output template: + +**Output:** +- Output the action decision and reasons for your decision. Use the following format: + ``` + Action: [action] + Reasons: [reasons] + ``` +Next, you will be provided with the expected state of this phase and the execution results of your local plan. +""" # FIXME: delete this as we may not use it + +EXECUTOR_FALSE_CHECK = """You have encountered an exception in the execution process. Your current responsibility is to meticulously inspect the execution results of actions and identify the root causes of these exceptions. You have two options: +1) Suspect issues within your local plan and employ the action ```revise```, or +2) Suspect problems with the global planner's plan and trigger a request for replanning by executing the action ```request```. +Provide detailed reasons for your action decision: +- If you suspect issues with your local plan, explain why you decide to use the action ```revise```. +- If you suspect problems with the global planner's plan, explain why you decide to use the action ```request```. +Follow this structured output template: + +**Output:** +- Output the action decision and reasons for your decision. Use the following format: + ``` + Action: [action] + Reasons: [reasons] + ``` +Next, you will be provided with the expected state of this phase and the execution exceptions during the execution of your local plan. +""" + +EXECUTOR_SELF_REVISE = """Now, you have analyzed the situation and decided adjustments are needed to the local plan. Here is the reasons you proposed: {reasons}. Provide a revised plan using Page Operation Actions, and make sure to follow the format for action generation as mentioned earlier""" + +EXECUTOR_OVERRULED_REVISE = """Facing your request, the global planner believes his previous global plan is correct, and refuse to adjust it and overrule your request. Here is the reasons he proposed: {reasons}. Based these information and your past experience, provide a revised plan using Page Operation Actions, and make sure to follow the format for action generation as mentioned earlier.""" diff --git a/agenthub/coact_agent/meta/planner_prompt.py b/agenthub/coact_agent/meta/planner_prompt.py new file mode 100644 index 00000000000..451459c1797 --- /dev/null +++ b/agenthub/coact_agent/meta/planner_prompt.py @@ -0,0 +1,33 @@ +PLANNER_CONSTRUCT_PLAN = """Your role is to construct a multi-stage global plan, providing separate subtask descriptions and expected states for each phase. Ensure that your plan is comprehensive and covers all aspects of the task. Use the following format: + +**Subtask 1:** +- **Subtask**: Describe the first subtask you are about to plan. +- **Expected State**: Define the expected state of the task after completing the first subtask. + +**Subtask 2:** +- **Subtask**: Describe the second subtask you are about to plan. +- **Expected State**: Define the expected state of the task after completing the second subtask. + +... + +**Subtask n:** +- **Subtask**: Describe the nth subtask you are about to plan. +- **Expected State**: Define the expected state of the task after completing the nth subtask. +""" + +PLANNER_DECIDE = """The local agent encountered issues with the global planner's global plan and he believes it's necessary to replan globally. Here is the reasons he proposed: {reasons}. Your current task is to decide whether to agree to the re-planning based on the description in the request for a new global plan submitted by the local agent. If you agree, the next action is to ```revise```; if not, the next action is to ```overrule```. Consider the implications of your decision and provide clear reasoning for it. Make sure to give clear and constructive guidance for plan adjustments. + Follow this structured output template: + +**Output:** +- Output the action decision and reasons for your decision. Use the following format: + ``` + Action: [action] + Reasons: [reasons] + ``` +""" + +PLANNER_REVISE = """In response to the local agent's request, you have chosen to revise your previously made global plan. Reconsider the characteristics of the task based on the description in the request and create a new global plan. Ensure that your revised plan is well-detailed and addresses any issues identified. And make sure to follow the format for action generation as mentioned earlier. +""" + +PLANNER_COLLATE = """Your role now involves collating the final result to meet the needs of the task. Ensure that the final output aligns with the global plan and that any necessary adjustments have been made. Provide a comprehensive summary of the task's completion. +""" diff --git a/agenthub/coact_agent/planner/action_parser.py b/agenthub/coact_agent/planner/action_parser.py new file mode 100644 index 00000000000..55352766179 --- /dev/null +++ b/agenthub/coact_agent/planner/action_parser.py @@ -0,0 +1,87 @@ +import re + +from agenthub.codeact_agent.action_parser import ( + CodeActActionParserAgentDelegate, + CodeActActionParserCmdRun, + CodeActActionParserFinish, + CodeActActionParserIPythonRunCell, + CodeActActionParserMessage, + CodeActResponseParser, +) +from openhands.controller.action_parser import ActionParser +from openhands.events.action import ( + Action, + AgentDelegateAction, +) + + +class PlannerResponseParser(CodeActResponseParser): + """Parser action: + - CmdRunAction(command) - bash command to run + - IPythonRunCellAction(code) - IPython code to run + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + - MessageAction(content) - Message action to run (e.g. ask for clarification) + - AgentFinishAction() - end the interaction + """ + + def __init__(self): + # Need pay attention to the item order in self.action_parsers + super().__init__() + self.action_parsers = [ + CodeActActionParserFinish(), + CodeActActionParserCmdRun(), + CodeActActionParserIPythonRunCell(), + CodeActActionParserAgentDelegate(), + CoActActionParserGlobalPlan(), + ] + self.default_parser = CodeActActionParserMessage() + + def parse_response(self, response) -> str: + action = response.choices[0].message.content + if action is None: + return '' + for action_suffix in [ + 'bash', + 'ipython', + 'browse', + 'global_plan', + 'decide', + 'revise', + 'overrule', + 'collation', + ]: + if ( + f'' in action + and f'' not in action + ): + action += f'' + return action + + +class CoActActionParserGlobalPlan(ActionParser): + """Parser action: + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + """ + + def __init__( + self, + ): + self.global_plan = None + + def check_condition(self, action_str: str) -> bool: + self.global_plan = re.search( + r'(.*)', action_str, re.DOTALL + ) + return self.global_plan is not None + + def parse(self, action_str: str) -> Action: + assert ( + self.global_plan is not None + ), 'self.global_plan should not be None when parse is called' + # thought = action_str.replace(self.global_plan.group(0), '').strip() + global_plan_actions = self.global_plan.group(1).strip() + return AgentDelegateAction( + agent='CoActExecutorAgent', + inputs={'task': global_plan_actions}, + action_suffix='global_plan', + ) # FIXME: check to use a more proper key e.g. 'global_plan' instead of 'task' diff --git a/agenthub/coact_agent/planner/planner_agent.py b/agenthub/coact_agent/planner/planner_agent.py new file mode 100644 index 00000000000..cd0e6c434f5 --- /dev/null +++ b/agenthub/coact_agent/planner/planner_agent.py @@ -0,0 +1,22 @@ +import os + +from agenthub.coact_agent.planner.action_parser import PlannerResponseParser +from agenthub.codeact_agent.codeact_agent import CodeActAgent +from openhands.core.config import AgentConfig +from openhands.llm.llm import LLM +from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement +from openhands.utils.prompt import PromptManager + + +class GlobalPlannerAgent(CodeActAgent): + VERSION = '1.0' + + def __init__(self, llm: LLM, config: AgentConfig) -> None: + super().__init__(llm, config) + + self.action_parser = PlannerResponseParser() + self.prompt_manager = PromptManager( + prompt_dir=os.path.join(os.path.dirname(__file__)), + agent_skills_docs=AgentSkillsRequirement.documentation, + micro_agent=self.micro_agent, + ) diff --git a/agenthub/coact_agent/planner/system_prompt.j2 b/agenthub/coact_agent/planner/system_prompt.j2 new file mode 100644 index 00000000000..07aa489262e --- /dev/null +++ b/agenthub/coact_agent/planner/system_prompt.j2 @@ -0,0 +1,59 @@ +{% set MINIMAL_SYSTEM_PREFIX %} +You are an autonomous intelligent agent playing the role of a strategic leader in a multi-tier task execution structure, tasked with performing programming tasks. You will be given coding-related tasks. Your responsibility is to provide high-level, strategic plans with tasks that can be broken down into smaller tasks by the local agent. + +Here's the information you'll have: +* The user's objective: This is the task you're trying to complete. +* The user's codebase: This is what you need to navigate and use to define the plan. + +You can use a Python environment with , e.g.: + +print("Hello World!") + + +The agent can execute bash commands wrapped with , e.g. ls . + +The agent is not allowed to run interactive commands. For commands that may run indefinitely, +the output should be redirected to a file and the command run in the background, e.g. python3 app.py > server.log 2>&1 & + +If a command execution result says "Command timed out. Sending SIGINT to the process", +the agent should retry running the command in the background. + +As a planner agent, there are some additional actions that you can use to communicate and delegate tasks to the local agent: +- ``: Construct a multi-stage global plan, providing separate subtask descriptions and expected states for each phase. +- ``: Based on the description in the request for a new global plan submitted by the local agent, decide whether to agree to the re-planning. If so, the next action is to revise; if not, the next action is to overrule. +- ``: Facing the local agent's request, you chose to revise your previously made global plan. Please reconsider the characteristics of the task based on the description in the request and make a new global plan. +- ``: Facing the local agent's request, you believe your previous global plan is correct, so you refuse to adjust it and overrule the local agent's request. You believe they should adjust their local plan instead. Please give them suggestions for modifications. +{% endset %} +{% set PIP_INSTALL_PREFIX %} +The agent can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. +{% endset %} +{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + PIP_INSTALL_PREFIX %} +{% set COMMAND_DOCS %} +Apart from the standard Python library, the agent can also use the following functions (already imported) in environment: +{{ agent_skills_docs }} +IMPORTANT: +- `open_file` only returns the first 100 lines of the file by default! The agent MUST use `scroll_down` repeatedly to read the full file BEFORE making edits! +- The agent shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the agent would like to add the line ' print(x)', it must fully write the line out, with all leading spaces before the code! +- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +- Any code issued should be less than 50 lines to avoid context being cut off! +- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content! +- For `edit_file_by_replace` NEVER provide empty parameters! +- For `edit_file_by_replace` the file must be read fully before any replacements! +{% endset %} +{% set SYSTEM_SUFFIX %} +Responses should be concise. +The agent should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , or per response, unless the agent is finished with the task or needs more input or action from the user in order to proceed. +If the agent is finished with the task you MUST include in your response. +IMPORTANT: Execute code using , or whenever possible. +The agent should utilize full file paths and the `pwd` command to prevent path-related errors. +The agent must avoid apologies and thanks in its responses. +The planner agent must NOT perform the plan by itself and must delegate the plan to the local agent using . +The planner agent can only use to browse the Internet if it's required to come up with the plan. To delegate the plan, please only use . +After the local agent successfully completes the plan without any issues, you can collate the result and if it is good, you can exit. + +{% endset %} +{# Combine all parts without newlines between them #} +{{ SYSTEM_PREFIX -}} +{{- COMMAND_DOCS -}} +{{- SYSTEM_SUFFIX }} diff --git a/agenthub/coact_agent/planner/user_prompt.j2 b/agenthub/coact_agent/planner/user_prompt.j2 new file mode 100644 index 00000000000..bae6bfc18cf --- /dev/null +++ b/agenthub/coact_agent/planner/user_prompt.j2 @@ -0,0 +1,231 @@ +{% set DEFAULT_EXAMPLE %} +--- START OF EXAMPLE --- + +USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000. + +ASSISTANT: +Sure! Let me create a Python file `app.py`: + +create_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (1 lines total)] +(this is the beginning of the file) +1| +(this is the end of the file) +[File app.py created.] + +ASSISTANT: +Now I will write the Python code for starting a web server and save it to the file `app.py`: + +EDITED_CODE="""from flask import Flask +app = Flask(__name__) + +@app.route('/') +def index(): + numbers = list(range(1, 11)) + return str(numbers) + +if __name__ == '__main__': + app.run(port=5000)""" + +insert_content_at_line( + 'app.py', + 1, + EDITED_CODE, +) + + +USER: +OBSERVATION: +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) +[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 + +ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: + +cat server.log + + +USER: +OBSERVATION: +Traceback (most recent call last): + File "/workspace/app.py", line 2, in + from flask import Flask +ModuleNotFoundError: No module named 'flask' + +ASSISTANT: +It seems that Flask is not installed. Let me install Flask for you: + +pip install flask + + +USER: +OBSERVATION: +Defaulting to user installation because normal site-packages is not writeable +Collecting flask + Using cached flask-3.0.3-py3-none-any.whl (101 kB) +Collecting blinker>=1.6.2 + Using cached blinker-1.7.0-py3-none-any.whl (13 kB) +Collecting Werkzeug>=3.0.0 + Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB) +Collecting click>=8.1.3 + Using cached click-8.1.7-py3-none-any.whl (97 kB) +Collecting itsdangerous>=2.1.2 + Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB) +Requirement already satisfied: Jinja2>=3.1.2 in /home/openhands/.local/lib/python3.10/site-packages (from flask) (3.1.3) +Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5) +Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask +Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0 + +ASSISTANT: +Now that Flask is installed, let me run the Python file again: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 124 + +ASSISTANT: +Let me check the server log again: + +cat server.log + + +USER: +OBSERVATION: +* Serving Flask app 'app' + * Debug mode: off +WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. + * Running on http://127.0.0.1:5000 +Press CTRL+C to quit + +ASSISTANT: +The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! + +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure! Let me browse the server's homepage at http://127.0.0.1:5000: + +Get the content on "http://127.0.0.1:5000" + + +USER: +OBSERVATION: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + +USER: Now kill the server, make it display the numbers in a table format. + +ASSISTANT: +Sure! Let me stop the server first: + +kill 124 + + +USER: +OBSERVATION: +[1]+ Terminated python3 app.py > server.log 2>&1 + +ASSISTANT: +The server has been stopped. Let me open the Python file and modify it. + +open_file('app.py') + + +USER: +[File: /workspace/app.py (10 lines total)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) + +ASSISTANT: +I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: + +edit_file_by_replace( + 'app.py', + to_replace=" return str(numbers)", + new_content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'", +) +
+ +USER: +Observation: +[File: /workspace/app.py (10 lines total after edit)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) +[File updated (edited at line 7). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +Running the updated file: + +python3 app.py > server.log 2>&1 & + + +USER: +Observation: +[1] 126 + +ASSISTANT: +The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests! + + +--- END OF EXAMPLE --- +{% endset %} +Here is an example of how you can interact with the environment for task solving: +{{ DEFAULT_EXAMPLE }} +{% if micro_agent %} +--- BEGIN OF GUIDELINE --- +The following information may assist you in completing your task: + +{{ micro_agent }} +--- END OF GUIDELINE --- +{% endif %} + +NOW, LET'S START! diff --git a/agenthub/codeact_agent/action_parser.py b/agenthub/codeact_agent/action_parser.py index 96d9245afb5..0db7b93ae99 100644 --- a/agenthub/codeact_agent/action_parser.py +++ b/agenthub/codeact_agent/action_parser.py @@ -70,7 +70,7 @@ def parse(self, action_str: str) -> Action: self.finish_command is not None ), 'self.finish_command should not be None when parse is called' thought = action_str.replace(self.finish_command.group(0), '').strip() - return AgentFinishAction(thought=thought) + return AgentFinishAction(thought=thought, outputs={'content': thought}) class CodeActActionParserCmdRun(ActionParser): diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py index ea45068e3f9..937ce304486 100644 --- a/agenthub/codeact_agent/codeact_agent.py +++ b/agenthub/codeact_agent/codeact_agent.py @@ -102,7 +102,7 @@ def action_to_str(self, action: Action) -> str: elif isinstance(action, IPythonRunCellAction): return f'{action.thought}\n\n{action.code}\n' elif isinstance(action, AgentDelegateAction): - return f'{action.thought}\n\n{action.inputs["task"]}\n' + return f'{action.thought}\n\n{action.inputs["task"]}\n' elif isinstance(action, MessageAction): return action.content elif isinstance(action, AgentFinishAction) and action.source == 'agent': @@ -220,6 +220,11 @@ def step(self, state: State) -> Action: return self.action_parser.parse(response) def _get_messages(self, state: State) -> list[Message]: + delegated_task = '' + if state.inputs.get('task') is not None: + # CodeActAgent is delegated a task + delegated_task = state.inputs['task'] + messages: list[Message] = [ Message( role='system', @@ -234,7 +239,9 @@ def _get_messages(self, state: State) -> list[Message]: role='user', content=[ TextContent( - text=self.prompt_manager.initial_user_message, + text=self.prompt_manager.initial_user_message + + '\n' + + delegated_task, cache_prompt=self.llm.is_caching_prompt_active(), # if the user asks the same query, ) ], diff --git a/frontend/src/components/Workspace.tsx b/frontend/src/components/Workspace.tsx index 4d4d5ecf5cf..3d9e1f53080 100644 --- a/frontend/src/components/Workspace.tsx +++ b/frontend/src/components/Workspace.tsx @@ -25,6 +25,8 @@ function Workspace() { const extraTabsMap: { [key: string]: TabOption[] } = { CodeActAgent: [TabOption.JUPYTER], PlannerAgent: [TabOption.PLANNER], + CoActPlannerAgent: [TabOption.JUPYTER], + CoActExecutorAgent: [TabOption.JUPYTER], }; const extraTabs = extraTabsMap[AGENT] || []; const showTabs = [...baseTabs, ...extraTabs]; diff --git a/openhands/events/action/agent.py b/openhands/events/action/agent.py index f49f573ed69..2eb2368a9c8 100644 --- a/openhands/events/action/agent.py +++ b/openhands/events/action/agent.py @@ -74,7 +74,8 @@ class AgentDelegateAction(Action): inputs: dict thought: str = '' action: str = ActionType.DELEGATE + action_suffix: str = 'browse' @property def message(self) -> str: - return f"I'm asking {self.agent} for help with this task." + return f"The task to delegate is: {self.inputs['task']}. I'm asking {self.agent} for help with this task." diff --git a/openhands/events/observation/browse.py b/openhands/events/observation/browse.py index a337b5f260b..5ddd40a12e4 100644 --- a/openhands/events/observation/browse.py +++ b/openhands/events/observation/browse.py @@ -38,6 +38,6 @@ def __str__(self) -> str: f'Last browser action: {self.last_browser_action}\n' f'Last browser action error: {self.last_browser_action_error}\n' f'Focused element bid: {self.focused_element_bid}\n' - f'axTree: {self.axtree_object}\n' - f'CONTENT: {self.content}\n' + # f'axTree: {self.axtree_object}\n' # too long + f'CONTENT: {self.content if len(self.content) < 1000 else self.content[:1000]}\n' )