diff --git a/agenthub/coact_agent/__init__.py b/agenthub/coact_agent/__init__.py
index 4f25bf07832..be2273f2d69 100644
--- a/agenthub/coact_agent/__init__.py
+++ b/agenthub/coact_agent/__init__.py
@@ -1,6 +1,10 @@
+from agenthub.coact_agent.executor.executor_agent import (
+ LocalExecutorAgent as CoActExecutorAgent,
+)
from agenthub.coact_agent.planner.planner_agent import (
GlobalPlannerAgent as CoActPlannerAgent,
)
from openhands.controller.agent import Agent
Agent.register('CoActPlannerAgent', CoActPlannerAgent)
+Agent.register('CoActExecutorAgent', CoActExecutorAgent)
diff --git a/agenthub/coact_agent/executor/action_parser.py b/agenthub/coact_agent/executor/action_parser.py
new file mode 100644
index 00000000000..752f940e841
--- /dev/null
+++ b/agenthub/coact_agent/executor/action_parser.py
@@ -0,0 +1,39 @@
+from agenthub.codeact_agent.action_parser import (
+ CodeActActionParserAgentDelegate,
+ CodeActActionParserCmdRun,
+ CodeActActionParserFinish,
+ CodeActActionParserIPythonRunCell,
+ CodeActActionParserMessage,
+ CodeActResponseParser,
+)
+
+
+class ExecutorResponseParser(CodeActResponseParser):
+ """Parser action:
+ - CmdRunAction(command) - bash command to run
+ - IPythonRunCellAction(code) - IPython code to run
+ - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
+ - MessageAction(content) - Message action to run (e.g. ask for clarification)
+ - AgentFinishAction() - end the interaction
+ """
+
+ def __init__(self):
+ # Need pay attention to the item order in self.action_parsers
+ super().__init__()
+ self.action_parsers = [
+ CodeActActionParserFinish(),
+ CodeActActionParserCmdRun(),
+ CodeActActionParserIPythonRunCell(),
+ CodeActActionParserAgentDelegate(),
+ # TODO: additional parsers
+ ]
+ self.default_parser = CodeActActionParserMessage()
+
+ def parse_response(self, response) -> str:
+ action = response.choices[0].message.content
+ if action is None:
+ return ''
+ for lang in ['bash', 'ipython', 'browse']:
+ if f'' in action and f'' not in action:
+ action += f''
+ return action
diff --git a/agenthub/coact_agent/executor/executor_agent.py b/agenthub/coact_agent/executor/executor_agent.py
new file mode 100644
index 00000000000..ffc2cf86b39
--- /dev/null
+++ b/agenthub/coact_agent/executor/executor_agent.py
@@ -0,0 +1,12 @@
+from agenthub.coact_agent.executor.action_parser import ExecutorResponseParser
+from agenthub.codeact_agent.codeact_agent import CodeActAgent
+from openhands.core.config import AgentConfig
+from openhands.llm.llm import LLM
+
+
+class LocalExecutorAgent(CodeActAgent):
+ VERSION = '1.0'
+
+ def __init__(self, llm: LLM, config: AgentConfig) -> None:
+ super().__init__(llm, config)
+ self.action_parser = ExecutorResponseParser()
diff --git a/agenthub/coact_agent/executor/system_prompt.j2 b/agenthub/coact_agent/executor/system_prompt.j2
new file mode 100644
index 00000000000..4454df1ca32
--- /dev/null
+++ b/agenthub/coact_agent/executor/system_prompt.j2
@@ -0,0 +1,47 @@
+{% set MINIMAL_SYSTEM_PREFIX %}
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed answers to the user's questions.
+The assistant can use a Python environment with , e.g.:
+
+print("Hello World!")
+
+The assistant can execute bash commands wrapped with , e.g. ls .
+The assistant is not allowed to run interactive commands. For commands that may run indefinitely,
+the output should be redirected to a file and the command run in the background, e.g. python3 app.py > server.log 2>&1 &
+If a command execution result says "Command timed out. Sending SIGINT to the process",
+the assistant should retry running the command in the background.
+{% endset %}
+{% set BROWSING_PREFIX %}
+The assistant can browse the Internet with and .
+For example, Tell me the usa's president using google search .
+Or Tell me what is in http://example.com .
+{% endset %}
+{% set PIP_INSTALL_PREFIX %}
+The assistant can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them.
+{% endset %}
+{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX %}
+{% set COMMAND_DOCS %}
+Apart from the standard Python library, the assistant can also use the following functions (already imported) in environment:
+{{ agent_skills_docs }}
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The assistant MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- The assistant shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write the line out, with all leading spaces before the code!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content!
+- For `edit_file_by_replace` NEVER provide empty parameters!
+- For `edit_file_by_replace` the file must be read fully before any replacements!
+{% endset %}
+{% set SYSTEM_SUFFIX %}
+Responses should be concise.
+The assistant should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
+Include ONLY ONE , , or per response, unless the assistant is finished with the task or needs more input or action from the user in order to proceed.
+If the assistant is finished with the task you MUST include in your response.
+IMPORTANT: Execute code using , , or whenever possible.
+The assistant should utilize full file paths and the `pwd` command to prevent path-related errors.
+The assistant must avoid apologies and thanks in its responses.
+
+{% endset %}
+{# Combine all parts without newlines between them #}
+{{ SYSTEM_PREFIX -}}
+{{- COMMAND_DOCS -}}
+{{- SYSTEM_SUFFIX }}
diff --git a/agenthub/coact_agent/executor/user_prompt.j2 b/agenthub/coact_agent/executor/user_prompt.j2
new file mode 100644
index 00000000000..bae6bfc18cf
--- /dev/null
+++ b/agenthub/coact_agent/executor/user_prompt.j2
@@ -0,0 +1,231 @@
+{% set DEFAULT_EXAMPLE %}
+--- START OF EXAMPLE ---
+
+USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.
+
+ASSISTANT:
+Sure! Let me create a Python file `app.py`:
+
+create_file('app.py')
+
+
+USER:
+OBSERVATION:
+[File: /workspace/app.py (1 lines total)]
+(this is the beginning of the file)
+1|
+(this is the end of the file)
+[File app.py created.]
+
+ASSISTANT:
+Now I will write the Python code for starting a web server and save it to the file `app.py`:
+
+EDITED_CODE="""from flask import Flask
+app = Flask(__name__)
+
+@app.route('/')
+def index():
+ numbers = list(range(1, 11))
+ return str(numbers)
+
+if __name__ == '__main__':
+ app.run(port=5000)"""
+
+insert_content_at_line(
+ 'app.py',
+ 1,
+ EDITED_CODE,
+)
+
+
+USER:
+OBSERVATION:
+(this is the beginning of the file)
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6| numbers = list(range(1, 11))
+7| return str(numbers)
+8|
+9|if __name__ == '__main__':
+10| app.run(port=5000)
+(this is the end of the file)
+[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
+
+python3 app.py > server.log 2>&1 &
+
+
+USER:
+OBSERVATION:
+[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1
+
+ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
+
+cat server.log
+
+
+USER:
+OBSERVATION:
+Traceback (most recent call last):
+ File "/workspace/app.py", line 2, in
+ from flask import Flask
+ModuleNotFoundError: No module named 'flask'
+
+ASSISTANT:
+It seems that Flask is not installed. Let me install Flask for you:
+
+pip install flask
+
+
+USER:
+OBSERVATION:
+Defaulting to user installation because normal site-packages is not writeable
+Collecting flask
+ Using cached flask-3.0.3-py3-none-any.whl (101 kB)
+Collecting blinker>=1.6.2
+ Using cached blinker-1.7.0-py3-none-any.whl (13 kB)
+Collecting Werkzeug>=3.0.0
+ Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB)
+Collecting click>=8.1.3
+ Using cached click-8.1.7-py3-none-any.whl (97 kB)
+Collecting itsdangerous>=2.1.2
+ Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
+Requirement already satisfied: Jinja2>=3.1.2 in /home/openhands/.local/lib/python3.10/site-packages (from flask) (3.1.3)
+Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5)
+Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
+Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
+
+ASSISTANT:
+Now that Flask is installed, let me run the Python file again:
+
+python3 app.py > server.log 2>&1 &
+
+
+USER:
+OBSERVATION:
+[1] 124
+
+ASSISTANT:
+Let me check the server log again:
+
+cat server.log
+
+
+USER:
+OBSERVATION:
+* Serving Flask app 'app'
+ * Debug mode: off
+WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
+ * Running on http://127.0.0.1:5000
+Press CTRL+C to quit
+
+ASSISTANT:
+The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
+
+USER: Now browse the newly started server's homepage and show me the content.
+
+ASSISTANT:
+Sure! Let me browse the server's homepage at http://127.0.0.1:5000:
+
+Get the content on "http://127.0.0.1:5000"
+
+
+USER:
+OBSERVATION:
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+ASSISTANT:
+The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask!
+
+USER: Now kill the server, make it display the numbers in a table format.
+
+ASSISTANT:
+Sure! Let me stop the server first:
+
+kill 124
+
+
+USER:
+OBSERVATION:
+[1]+ Terminated python3 app.py > server.log 2>&1
+
+ASSISTANT:
+The server has been stopped. Let me open the Python file and modify it.
+
+open_file('app.py')
+
+
+USER:
+[File: /workspace/app.py (10 lines total)]
+(this is the beginning of the file)
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6| numbers = list(range(1, 11))
+7| return str(numbers)
+8|
+9|if __name__ == '__main__':
+10| app.run(port=5000)
+(this is the end of the file)
+
+ASSISTANT:
+I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file:
+
+edit_file_by_replace(
+ 'app.py',
+ to_replace=" return str(numbers)",
+ new_content=" return '' + ''.join([f'{i} |
' for i in numbers]) + '
'",
+)
+
+
+USER:
+Observation:
+[File: /workspace/app.py (10 lines total after edit)]
+(this is the beginning of the file)
+1|from flask import Flask
+2|app = Flask(__name__)
+3|
+4|@app.route('/')
+5|def index():
+6| numbers = list(range(1, 11))
+7| return '' + ''.join([f'{i} |
' for i in numbers]) + '
'
+8|
+9|if __name__ == '__main__':
+10| app.run(port=5000)
+(this is the end of the file)
+[File updated (edited at line 7). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
+
+ASSISTANT:
+Running the updated file:
+
+python3 app.py > server.log 2>&1 &
+
+
+USER:
+Observation:
+[1] 126
+
+ASSISTANT:
+The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests!
+
+
+--- END OF EXAMPLE ---
+{% endset %}
+Here is an example of how you can interact with the environment for task solving:
+{{ DEFAULT_EXAMPLE }}
+{% if micro_agent %}
+--- BEGIN OF GUIDELINE ---
+The following information may assist you in completing your task:
+
+{{ micro_agent }}
+--- END OF GUIDELINE ---
+{% endif %}
+
+NOW, LET'S START!
diff --git a/agenthub/coact_agent/planner/action_parser.py b/agenthub/coact_agent/planner/action_parser.py
index 5aec357c687..55352766179 100644
--- a/agenthub/coact_agent/planner/action_parser.py
+++ b/agenthub/coact_agent/planner/action_parser.py
@@ -1,17 +1,21 @@
import re
-from openhands.controller.action_parser import ActionParser, ResponseParser
+from agenthub.codeact_agent.action_parser import (
+ CodeActActionParserAgentDelegate,
+ CodeActActionParserCmdRun,
+ CodeActActionParserFinish,
+ CodeActActionParserIPythonRunCell,
+ CodeActActionParserMessage,
+ CodeActResponseParser,
+)
+from openhands.controller.action_parser import ActionParser
from openhands.events.action import (
Action,
AgentDelegateAction,
- AgentFinishAction,
- CmdRunAction,
- IPythonRunCellAction,
- MessageAction,
)
-class PlannerResponseParser(ResponseParser):
+class PlannerResponseParser(CodeActResponseParser):
"""Parser action:
- CmdRunAction(command) - bash command to run
- IPythonRunCellAction(code) - IPython code to run
@@ -28,14 +32,10 @@ def __init__(self):
CodeActActionParserCmdRun(),
CodeActActionParserIPythonRunCell(),
CodeActActionParserAgentDelegate(),
- CodeActActionParserGlobalPlan(),
+ CoActActionParserGlobalPlan(),
]
self.default_parser = CodeActActionParserMessage()
- def parse(self, response) -> Action:
- action_str = self.parse_response(response)
- return self.parse_action(action_str)
-
def parse_response(self, response) -> str:
action = response.choices[0].message.content
if action is None:
@@ -57,14 +57,8 @@ def parse_response(self, response) -> str:
action += f''
return action
- def parse_action(self, action_str: str) -> Action:
- for action_parser in self.action_parsers:
- if action_parser.check_condition(action_str):
- return action_parser.parse(action_str)
- return self.default_parser.parse(action_str)
-
-class CodeActActionParserGlobalPlan(ActionParser):
+class CoActActionParserGlobalPlan(ActionParser):
"""Parser action:
- AgentDelegateAction(agent, inputs) - delegate action for (sub)task
"""
@@ -87,135 +81,7 @@ def parse(self, action_str: str) -> Action:
# thought = action_str.replace(self.global_plan.group(0), '').strip()
global_plan_actions = self.global_plan.group(1).strip()
return AgentDelegateAction(
- agent='CodeActAgent',
+ agent='CoActExecutorAgent',
inputs={'task': global_plan_actions},
action_suffix='global_plan',
) # FIXME: check to use a more proper key e.g. 'global_plan' instead of 'task'
-
-
-class CodeActActionParserFinish(ActionParser):
- """Parser action:
- - AgentFinishAction() - end the interaction
- """
-
- def __init__(
- self,
- ):
- self.finish_command = None
-
- def check_condition(self, action_str: str) -> bool:
- self.finish_command = re.search(r'.*', action_str, re.DOTALL)
- return self.finish_command is not None
-
- def parse(self, action_str: str) -> Action:
- assert (
- self.finish_command is not None
- ), 'self.finish_command should not be None when parse is called'
- thought = action_str.replace(self.finish_command.group(0), '').strip()
- return AgentFinishAction(thought=thought)
-
-
-class CodeActActionParserCmdRun(ActionParser):
- """Parser action:
- - CmdRunAction(command) - bash command to run
- - AgentFinishAction() - end the interaction
- """
-
- def __init__(
- self,
- ):
- self.bash_command = None
-
- def check_condition(self, action_str: str) -> bool:
- self.bash_command = re.search(
- r'(.*?)', action_str, re.DOTALL
- )
- return self.bash_command is not None
-
- def parse(self, action_str: str) -> Action:
- assert (
- self.bash_command is not None
- ), 'self.bash_command should not be None when parse is called'
- thought = action_str.replace(self.bash_command.group(0), '').strip()
- # a command was found
- command_group = self.bash_command.group(1).strip()
- if command_group.strip() == 'exit':
- return AgentFinishAction(thought=thought)
- return CmdRunAction(command=command_group, thought=thought)
-
-
-class CodeActActionParserIPythonRunCell(ActionParser):
- """Parser action:
- - IPythonRunCellAction(code) - IPython code to run
- """
-
- def __init__(
- self,
- ):
- self.python_code = None
- self.jupyter_kernel_init_code: str = 'from agentskills import *'
-
- def check_condition(self, action_str: str) -> bool:
- self.python_code = re.search(
- r'(.*?)', action_str, re.DOTALL
- )
- return self.python_code is not None
-
- def parse(self, action_str: str) -> Action:
- assert (
- self.python_code is not None
- ), 'self.python_code should not be None when parse is called'
- code_group = self.python_code.group(1).strip()
- thought = action_str.replace(self.python_code.group(0), '').strip()
- return IPythonRunCellAction(
- code=code_group,
- thought=thought,
- kernel_init_code=self.jupyter_kernel_init_code,
- )
-
-
-class CodeActActionParserAgentDelegate(ActionParser):
- """Parser action:
- - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
- """
-
- def __init__(
- self,
- ):
- self.agent_delegate = None
-
- def check_condition(self, action_str: str) -> bool:
- self.agent_delegate = re.search(
- r'(.*)', action_str, re.DOTALL
- )
- return self.agent_delegate is not None
-
- def parse(self, action_str: str) -> Action:
- assert (
- self.agent_delegate is not None
- ), 'self.agent_delegate should not be None when parse is called'
- thought = action_str.replace(self.agent_delegate.group(0), '').strip()
- browse_actions = self.agent_delegate.group(1).strip()
- task = f'{thought}. I should start with: {browse_actions}'
- return AgentDelegateAction(
- agent='BrowsingAgent', inputs={'task': task}, action_suffix='browse'
- )
-
-
-class CodeActActionParserMessage(ActionParser):
- """Parser action:
- - MessageAction(content) - Message action to run (e.g. ask for clarification)
- """
-
- def __init__(
- self,
- ):
- pass
-
- def check_condition(self, action_str: str) -> bool:
- # We assume the LLM is GOOD enough that when it returns pure natural language
- # it wants to talk to the user
- return True
-
- def parse(self, action_str: str) -> Action:
- return MessageAction(content=action_str, wait_for_response=True)
diff --git a/frontend/src/components/Workspace.tsx b/frontend/src/components/Workspace.tsx
index 90034326d0e..3d9e1f53080 100644
--- a/frontend/src/components/Workspace.tsx
+++ b/frontend/src/components/Workspace.tsx
@@ -26,6 +26,7 @@ function Workspace() {
CodeActAgent: [TabOption.JUPYTER],
PlannerAgent: [TabOption.PLANNER],
CoActPlannerAgent: [TabOption.JUPYTER],
+ CoActExecutorAgent: [TabOption.JUPYTER],
};
const extraTabs = extraTabsMap[AGENT] || [];
const showTabs = [...baseTabs, ...extraTabs];