From 1dd8c3fdb100e13068d83dc5de9fc65529bca464 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:15:09 -0400
Subject: [PATCH 01/31] prevent evenstream from looping back on itself

---
 openhands/events/stream.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/openhands/events/stream.py b/openhands/events/stream.py
index 1e4c3b9d539..cda1578295a 100644
--- a/openhands/events/stream.py
+++ b/openhands/events/stream.py
@@ -138,6 +138,10 @@ def add_event(self, event: Event, source: EventSource):
             asyncio.run(self.async_add_event(event, source))
 
     async def async_add_event(self, event: Event, source: EventSource):
+        if event._id is not None:  # type: ignore [attr-defined]
+            raise ValueError(
+                'Event already has an ID. It was probably added back to the EventStream from inside a handler, trigging a loop.'
+            )
         with self._lock:
             event._id = self._cur_id  # type: ignore [attr-defined]
             self._cur_id += 1

From ba3c558ff7cd2dfb51967857c1c61253d603734e Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:17:42 -0400
Subject: [PATCH 02/31] use hasattr

---
 openhands/events/stream.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/events/stream.py b/openhands/events/stream.py
index cda1578295a..b2368da2030 100644
--- a/openhands/events/stream.py
+++ b/openhands/events/stream.py
@@ -138,7 +138,7 @@ def add_event(self, event: Event, source: EventSource):
             asyncio.run(self.async_add_event(event, source))
 
     async def async_add_event(self, event: Event, source: EventSource):
-        if event._id is not None:  # type: ignore [attr-defined]
+        if hasattr(event, '_id') and event.id is not None:
             raise ValueError(
                 'Event already has an ID. It was probably added back to the EventStream from inside a handler, trigging a loop.'
             )

From 1b72a8966b406983c0ca9d45daf0355a364fcd62 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:30:56 -0400
Subject: [PATCH 03/31] refactor a bit

---
 evaluation/EDA/run_infer.py                   |  2 +-
 evaluation/agent_bench/run_infer.py           |  2 +-
 evaluation/aider_bench/run_infer.py           |  2 +-
 evaluation/biocoder/run_infer.py              |  2 +-
 evaluation/bird/run_infer.py                  |  2 +-
 evaluation/browsing_delegation/run_infer.py   |  2 +-
 evaluation/gaia/run_infer.py                  |  2 +-
 evaluation/gorilla/run_infer.py               |  2 +-
 evaluation/gpqa/run_infer.py                  |  2 +-
 evaluation/humanevalfix/run_infer.py          |  2 +-
 evaluation/integration_tests/run_infer.py     |  2 +-
 evaluation/logic_reasoning/run_infer.py       |  2 +-
 evaluation/miniwob/run_infer.py               |  2 +-
 evaluation/mint/run_infer.py                  |  2 +-
 evaluation/swe_bench/run_infer.py             |  8 ++--
 .../swe_bench/scripts/eval/compare_outputs.py |  0
 .../scripts/eval/convert_oh_output_to_md.py   |  0
 .../scripts/eval/summarize_outputs.py         |  0
 .../scripts/setup/compare_patch_filename.py   |  0
 evaluation/toolqa/run_infer.py                |  2 +-
 evaluation/webarena/run_infer.py              |  2 +-
 openhands/controller/agent_controller.py      | 38 ++++++++-----------
 openhands/controller/state/state.py           | 13 +++++--
 23 files changed, 44 insertions(+), 47 deletions(-)
 mode change 100755 => 100644 evaluation/swe_bench/scripts/eval/compare_outputs.py
 mode change 100755 => 100644 evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py
 mode change 100755 => 100644 evaluation/swe_bench/scripts/eval/summarize_outputs.py
 mode change 100755 => 100644 evaluation/swe_bench/scripts/setup/compare_patch_filename.py

diff --git a/evaluation/EDA/run_infer.py b/evaluation/EDA/run_infer.py
index 2c896939a75..c4f6c118411 100644
--- a/evaluation/EDA/run_infer.py
+++ b/evaluation/EDA/run_infer.py
@@ -158,7 +158,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'success': test_result,
             'final_message': final_message,
diff --git a/evaluation/agent_bench/run_infer.py b/evaluation/agent_bench/run_infer.py
index d6fcc62e079..e05c66e9135 100644
--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@@ -283,7 +283,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'agent_answer': agent_answer,
             'final_answer': final_ans,
diff --git a/evaluation/aider_bench/run_infer.py b/evaluation/aider_bench/run_infer.py
index fa1bb9534a8..9b7f33d404c 100644
--- a/evaluation/aider_bench/run_infer.py
+++ b/evaluation/aider_bench/run_infer.py
@@ -261,7 +261,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/biocoder/run_infer.py b/evaluation/biocoder/run_infer.py
index 4535ccba4e4..a05a6aa83aa 100644
--- a/evaluation/biocoder/run_infer.py
+++ b/evaluation/biocoder/run_infer.py
@@ -311,7 +311,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/bird/run_infer.py b/evaluation/bird/run_infer.py
index adb498cd2eb..d81e873636c 100644
--- a/evaluation/bird/run_infer.py
+++ b/evaluation/bird/run_infer.py
@@ -440,7 +440,7 @@ def execute_sql(db_path, sql):
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/browsing_delegation/run_infer.py b/evaluation/browsing_delegation/run_infer.py
index c9fe2ebd18b..38c91cae18d 100644
--- a/evaluation/browsing_delegation/run_infer.py
+++ b/evaluation/browsing_delegation/run_infer.py
@@ -121,7 +121,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'query': instance.instruction,
             'action': last_delegate_action,
diff --git a/evaluation/gaia/run_infer.py b/evaluation/gaia/run_infer.py
index c02cd0aee73..d72a8a449ea 100644
--- a/evaluation/gaia/run_infer.py
+++ b/evaluation/gaia/run_infer.py
@@ -213,7 +213,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/gorilla/run_infer.py b/evaluation/gorilla/run_infer.py
index 873cb7f8969..9881cea88c9 100644
--- a/evaluation/gorilla/run_infer.py
+++ b/evaluation/gorilla/run_infer.py
@@ -121,7 +121,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'text': model_answer_raw,
             'correct': correct,
diff --git a/evaluation/gpqa/run_infer.py b/evaluation/gpqa/run_infer.py
index 8fd4034c9d5..81f09ef5223 100644
--- a/evaluation/gpqa/run_infer.py
+++ b/evaluation/gpqa/run_infer.py
@@ -302,7 +302,7 @@ def process_instance(
         metadata=metadata,
         history=state.history.compatibility_for_eval_history_pairs(),
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'result': test_result,
             'found_answers': found_answers,
diff --git a/evaluation/humanevalfix/run_infer.py b/evaluation/humanevalfix/run_infer.py
index 25fee65561f..ae445faaa4a 100644
--- a/evaluation/humanevalfix/run_infer.py
+++ b/evaluation/humanevalfix/run_infer.py
@@ -264,7 +264,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/integration_tests/run_infer.py b/evaluation/integration_tests/run_infer.py
index a530041f92f..95dbb447841 100644
--- a/evaluation/integration_tests/run_infer.py
+++ b/evaluation/integration_tests/run_infer.py
@@ -134,7 +134,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result.model_dump(),
     )
     return output
diff --git a/evaluation/logic_reasoning/run_infer.py b/evaluation/logic_reasoning/run_infer.py
index 5b7d35f2113..57e0ec9582f 100644
--- a/evaluation/logic_reasoning/run_infer.py
+++ b/evaluation/logic_reasoning/run_infer.py
@@ -256,7 +256,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result=test_result,
     )
     return output
diff --git a/evaluation/miniwob/run_infer.py b/evaluation/miniwob/run_infer.py
index 9c2aaf1e096..58fc9da6cc1 100644
--- a/evaluation/miniwob/run_infer.py
+++ b/evaluation/miniwob/run_infer.py
@@ -173,7 +173,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'reward': reward,
         },
diff --git a/evaluation/mint/run_infer.py b/evaluation/mint/run_infer.py
index 8017b194d8d..554940ebaad 100644
--- a/evaluation/mint/run_infer.py
+++ b/evaluation/mint/run_infer.py
@@ -212,7 +212,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'success': task_state.success if task_state else False,
         },
diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
index 9ac1e0cf663..8ca66056e65 100644
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -402,10 +402,10 @@ def process_instance(
 
         # if fatal error, throw EvalError to trigger re-run
         if (
-            state.last_error
-            and 'fatal error during agent execution' in state.last_error
+            state.get_last_error()
+            and 'fatal error during agent execution' in state.get_last_error()
         ):
-            raise EvalException('Fatal error detected: ' + state.last_error)
+            raise EvalException('Fatal error detected: ' + state.get_last_error())
 
         # ======= THIS IS SWE-Bench specific =======
         # Get git patch
@@ -442,7 +442,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
     )
     return output
 
diff --git a/evaluation/swe_bench/scripts/eval/compare_outputs.py b/evaluation/swe_bench/scripts/eval/compare_outputs.py
old mode 100755
new mode 100644
diff --git a/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py b/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py
old mode 100755
new mode 100644
diff --git a/evaluation/swe_bench/scripts/eval/summarize_outputs.py b/evaluation/swe_bench/scripts/eval/summarize_outputs.py
old mode 100755
new mode 100644
diff --git a/evaluation/swe_bench/scripts/setup/compare_patch_filename.py b/evaluation/swe_bench/scripts/setup/compare_patch_filename.py
old mode 100755
new mode 100644
diff --git a/evaluation/toolqa/run_infer.py b/evaluation/toolqa/run_infer.py
index 5c2c5342278..c10d19fdf13 100644
--- a/evaluation/toolqa/run_infer.py
+++ b/evaluation/toolqa/run_infer.py
@@ -149,7 +149,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
     )
     return output
 
diff --git a/evaluation/webarena/run_infer.py b/evaluation/webarena/run_infer.py
index cfc2bdae493..d818f77307a 100644
--- a/evaluation/webarena/run_infer.py
+++ b/evaluation/webarena/run_infer.py
@@ -187,7 +187,7 @@ def process_instance(
         metadata=metadata,
         history=histories,
         metrics=metrics,
-        error=state.last_error if state and state.last_error else None,
+        error=state.get_last_error() if state and state.get_last_error() else None,
         test_result={
             'reward': reward,
         },
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 946f6a7d327..0e921508f9c 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -133,21 +133,14 @@ async def update_state_after_step(self):
         # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
 
-    async def report_error(self, message: str, exception: Exception | None = None):
-        """Reports an error to the user and sends the exception to the LLM next step, in the hope it can self-correct.
-
-        This method should be called for a particular type of errors, which have:
-        - a user-friendly message, which will be shown in the chat box. This should not be a raw exception message.
-        - an ErrorObservation that can be sent to the LLM by the user role, with the exception message, so it can self-correct next time.
-        """
-        self.state.last_error = message
-        if exception:
-            self.state.last_error += f': {exception}'
+    async def send_error_to_event_stream(
+        self, message: str, exception: Exception | None = None
+    ):
         detail = str(exception) if exception is not None else ''
         if exception is not None and isinstance(exception, litellm.AuthenticationError):
-            detail = 'Please check your credentials. Is your API key correct?'
+            detail += '\nPlease check your credentials. Is your API key correct?'
         self.event_stream.add_event(
-            ErrorObservation(f'{message}:{detail}'), EventSource.USER
+            ErrorObservation(f'{message}:{detail}'), EventSource.AGENT
         )
 
     async def start_step_loop(self):
@@ -164,7 +157,7 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 logger.error(f'Error while running the agent: {e}')
                 logger.error(traceback.format_exc())
-                await self.report_error(
+                await self.send_error_to_event_stream(
                     'There was an unexpected error while running the agent', exception=e
                 )
                 await self.set_agent_state_to(AgentState.ERROR)
@@ -254,9 +247,6 @@ async def _handle_observation(self, observation: Observation):
             if self.state.agent_state == AgentState.ERROR:
                 self.state.metrics.merge(self.state.local_metrics)
         elif isinstance(observation, FatalErrorObservation):
-            await self.report_error(
-                'There was a fatal error during agent execution: ' + str(observation)
-            )
             await self.set_agent_state_to(AgentState.ERROR)
             self.state.metrics.merge(self.state.local_metrics)
 
@@ -443,7 +433,7 @@ async def _step(self) -> None:
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
             # report to the user
             # and send the underlying exception to the LLM for self-correction
-            await self.report_error(str(e))
+            await self.send_error_to_event_stream(str(e))
             return
 
         if action.runnable:
@@ -468,9 +458,9 @@ async def _step(self) -> None:
         logger.info(action, extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            # This need to go BEFORE report_error to sync metrics
+            # This need to go BEFORE send_error_to_event_stream to sync metrics
             await self.set_agent_state_to(AgentState.ERROR)
-            await self.report_error('Agent got stuck in a loop')
+            await self.send_error_to_event_stream('Agent got stuck in a loop')
 
     async def _delegate_step(self):
         """Executes a single step of the delegate agent."""
@@ -489,7 +479,9 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self.report_error('Delegator agent encountered an error')
+            await self.send_error_to_event_stream(
+                'Delegator agent encountered an error'
+            )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
             logger.info(
                 f'[Agent Controller {self.id}] Delegate agent has finished execution'
@@ -538,17 +530,17 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                # This need to go BEFORE report_error to sync metrics
+                # This need to go BEFORE send_error_to_event_stream to sync metrics
                 await self.set_agent_state_to(AgentState.ERROR)
                 # set to ERROR state if running in headless mode
                 # since user cannot resume on the web interface
-                await self.report_error(
+                await self.send_error_to_event_stream(
                     f'Agent reached maximum {limit_type} in headless mode, task stopped. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}'
                 )
             else:
                 await self.set_agent_state_to(AgentState.PAUSED)
-                await self.report_error(
+                await self.send_error_to_event_stream(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
                     f'{TRAFFIC_CONTROL_REMINDER}'
diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py
index e14d44517a5..d204c9609ec 100644
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -11,6 +11,7 @@
     MessageAction,
 )
 from openhands.events.action.agent import AgentFinishAction
+from openhands.events.observation import ErrorObservation, FatalErrorObservation
 from openhands.llm.metrics import Metrics
 from openhands.memory.history import ShortTermHistory
 from openhands.storage.files import FileStore
@@ -80,7 +81,6 @@ class State:
     history: ShortTermHistory = field(default_factory=ShortTermHistory)
     inputs: dict = field(default_factory=dict)
     outputs: dict = field(default_factory=dict)
-    last_error: str | None = None
     agent_state: AgentState = AgentState.LOADING
     resume_state: AgentState | None = None
     traffic_control_state: TrafficControlState = TrafficControlState.NORMAL
@@ -124,9 +124,6 @@ def restore_from_session(sid: str, file_store: FileStore) -> 'State':
         else:
             state.resume_state = None
 
-        # don't carry last_error anymore after restore
-        state.last_error = None
-
         # first state after restore
         state.agent_state = AgentState.LOADING
         return state
@@ -157,6 +154,14 @@ def __setstate__(self, state):
 
         # remove the restored data from the state if any
 
+    def get_last_error(self) -> str:
+        for event in self.history.get_events(reverse=True):
+            if isinstance(event, ErrorObservation) or isinstance(
+                event, FatalErrorObservation
+            ):
+                return event.content
+        return ''
+
     def get_current_user_intent(self):
         """Returns the latest user message and image(if provided) that appears after a FinishAction, or the first (the task) if nothing was finished yet."""
         last_user_message = None

From ed3d94499ab15ab98e0863d19acdcbe140d83e19 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:31:40 -0400
Subject: [PATCH 04/31] revert perms

---
 evaluation/swe_bench/scripts/eval/compare_outputs.py         | 0
 evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py | 0
 evaluation/swe_bench/scripts/eval/summarize_outputs.py       | 0
 evaluation/swe_bench/scripts/setup/compare_patch_filename.py | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 evaluation/swe_bench/scripts/eval/compare_outputs.py
 mode change 100644 => 100755 evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py
 mode change 100644 => 100755 evaluation/swe_bench/scripts/eval/summarize_outputs.py
 mode change 100644 => 100755 evaluation/swe_bench/scripts/setup/compare_patch_filename.py

diff --git a/evaluation/swe_bench/scripts/eval/compare_outputs.py b/evaluation/swe_bench/scripts/eval/compare_outputs.py
old mode 100644
new mode 100755
diff --git a/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py b/evaluation/swe_bench/scripts/eval/convert_oh_output_to_md.py
old mode 100644
new mode 100755
diff --git a/evaluation/swe_bench/scripts/eval/summarize_outputs.py b/evaluation/swe_bench/scripts/eval/summarize_outputs.py
old mode 100644
new mode 100755
diff --git a/evaluation/swe_bench/scripts/setup/compare_patch_filename.py b/evaluation/swe_bench/scripts/setup/compare_patch_filename.py
old mode 100644
new mode 100755

From 57662b625819b47f7f1d5e4baf0c22261169279c Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:34:29 -0400
Subject: [PATCH 05/31] private method

---
 openhands/controller/agent_controller.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 0e921508f9c..11db85c33d7 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -133,7 +133,7 @@ async def update_state_after_step(self):
         # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
 
-    async def send_error_to_event_stream(
+    async def _send_error_to_event_stream(
         self, message: str, exception: Exception | None = None
     ):
         detail = str(exception) if exception is not None else ''
@@ -157,7 +157,7 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 logger.error(f'Error while running the agent: {e}')
                 logger.error(traceback.format_exc())
-                await self.send_error_to_event_stream(
+                await self._send_error_to_event_stream(
                     'There was an unexpected error while running the agent', exception=e
                 )
                 await self.set_agent_state_to(AgentState.ERROR)
@@ -433,7 +433,7 @@ async def _step(self) -> None:
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
             # report to the user
             # and send the underlying exception to the LLM for self-correction
-            await self.send_error_to_event_stream(str(e))
+            await self._send_error_to_event_stream(str(e))
             return
 
         if action.runnable:
@@ -458,9 +458,9 @@ async def _step(self) -> None:
         logger.info(action, extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            # This need to go BEFORE send_error_to_event_stream to sync metrics
+            # This need to go BEFORE _send_error_to_event_stream to sync metrics
             await self.set_agent_state_to(AgentState.ERROR)
-            await self.send_error_to_event_stream('Agent got stuck in a loop')
+            await self._send_error_to_event_stream('Agent got stuck in a loop')
 
     async def _delegate_step(self):
         """Executes a single step of the delegate agent."""
@@ -479,7 +479,7 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self.send_error_to_event_stream(
+            await self._send_error_to_event_stream(
                 'Delegator agent encountered an error'
             )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
@@ -530,17 +530,17 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                # This need to go BEFORE send_error_to_event_stream to sync metrics
+                # This need to go BEFORE _send_error_to_event_stream to sync metrics
                 await self.set_agent_state_to(AgentState.ERROR)
                 # set to ERROR state if running in headless mode
                 # since user cannot resume on the web interface
-                await self.send_error_to_event_stream(
+                await self._send_error_to_event_stream(
                     f'Agent reached maximum {limit_type} in headless mode, task stopped. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}'
                 )
             else:
                 await self.set_agent_state_to(AgentState.PAUSED)
-                await self.send_error_to_event_stream(
+                await self._send_error_to_event_stream(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
                     f'{TRAFFIC_CONTROL_REMINDER}'

From a7ce34e2692e1cce0ebb61f4b4b77d148c481615 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:45:25 -0400
Subject: [PATCH 06/31] async_add_event

---
 openhands/controller/agent_controller.py | 53 +++++++++++++-----------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 11db85c33d7..9550286c316 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -133,13 +133,18 @@ async def update_state_after_step(self):
         # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
 
-    async def _send_error_to_event_stream(
-        self, message: str, exception: Exception | None = None
+    async def _react_to_error(
+        self,
+        message: str,
+        exception: Exception | None = None,
+        new_state: AgentState | None = None,
     ):
+        if new_state is not None:
+            await self.set_agent_state_to(new_state)
         detail = str(exception) if exception is not None else ''
         if exception is not None and isinstance(exception, litellm.AuthenticationError):
             detail += '\nPlease check your credentials. Is your API key correct?'
-        self.event_stream.add_event(
+        await self.event_stream.async_add_event(
             ErrorObservation(f'{message}:{detail}'), EventSource.AGENT
         )
 
@@ -157,10 +162,11 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 logger.error(f'Error while running the agent: {e}')
                 logger.error(traceback.format_exc())
-                await self._send_error_to_event_stream(
-                    'There was an unexpected error while running the agent', exception=e
+                await self._react_to_error(
+                    'There was an unexpected error while running the agent',
+                    exception=e,
+                    new_state=AgentState.ERROR,
                 )
-                await self.set_agent_state_to(AgentState.ERROR)
                 break
 
             await asyncio.sleep(0.1)
@@ -320,7 +326,9 @@ async def set_agent_state_to(self, new_state: AgentState):
             else:
                 confirmation_state = ActionConfirmationStatus.REJECTED
             self._pending_action.confirmation_state = confirmation_state  # type: ignore[attr-defined]
-            self.event_stream.add_event(self._pending_action, EventSource.AGENT)
+            await self.event_stream.async_add_event(
+                self._pending_action, EventSource.AGENT
+            )
 
         self.state.agent_state = new_state
         self.event_stream.add_event(
@@ -433,7 +441,7 @@ async def _step(self) -> None:
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
             # report to the user
             # and send the underlying exception to the LLM for self-correction
-            await self._send_error_to_event_stream(str(e))
+            await self._react_to_error(str(e))
             return
 
         if action.runnable:
@@ -452,15 +460,15 @@ async def _step(self) -> None:
                 == ActionConfirmationStatus.AWAITING_CONFIRMATION
             ):
                 await self.set_agent_state_to(AgentState.AWAITING_USER_CONFIRMATION)
-            self.event_stream.add_event(action, EventSource.AGENT)
+            await self.event_stream.async_add_event(action, EventSource.AGENT)
 
         await self.update_state_after_step()
         logger.info(action, extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            # This need to go BEFORE _send_error_to_event_stream to sync metrics
-            await self.set_agent_state_to(AgentState.ERROR)
-            await self._send_error_to_event_stream('Agent got stuck in a loop')
+            await self._react_to_error(
+                'Agent got stuck in a loop', new_state=AgentState.ERROR
+            )
 
     async def _delegate_step(self):
         """Executes a single step of the delegate agent."""
@@ -479,9 +487,7 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self._send_error_to_event_stream(
-                'Delegator agent encountered an error'
-            )
+            await self._react_to_error('Delegator agent encountered an error')
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
             logger.info(
                 f'[Agent Controller {self.id}] Delegate agent has finished execution'
@@ -510,7 +516,7 @@ async def _delegate_step(self):
             # clean up delegate status
             self.delegate = None
             self.delegateAction = None
-            self.event_stream.add_event(obs, EventSource.AGENT)
+            await self.event_stream.async_add_event(obs, EventSource.AGENT)
         return
 
     async def _handle_traffic_control(
@@ -530,20 +536,17 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                # This need to go BEFORE _send_error_to_event_stream to sync metrics
-                await self.set_agent_state_to(AgentState.ERROR)
-                # set to ERROR state if running in headless mode
-                # since user cannot resume on the web interface
-                await self._send_error_to_event_stream(
+                await self._react_to_error(
                     f'Agent reached maximum {limit_type} in headless mode, task stopped. '
-                    f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}'
+                    f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}',
+                    new_state=AgentState.ERROR,
                 )
             else:
-                await self.set_agent_state_to(AgentState.PAUSED)
-                await self._send_error_to_event_stream(
+                await self._react_to_error(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
-                    f'{TRAFFIC_CONTROL_REMINDER}'
+                    f'{TRAFFIC_CONTROL_REMINDER}',
+                    new_state=AgentState.PAUSED,
                 )
             stop_step = True
         return stop_step

From d8becc23e39ea2a718858fae6bd312b12e5a873d Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:52:00 -0400
Subject: [PATCH 07/31] remove unnecessary return

---
 openhands/controller/agent_controller.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 9550286c316..22e466a8800 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -35,7 +35,6 @@
 from openhands.events.observation import (
     AgentDelegateObservation,
     AgentStateChangedObservation,
-    CmdOutputObservation,
     ErrorObservation,
     FatalErrorObservation,
     Observation,
@@ -245,9 +244,7 @@ async def _handle_observation(self, observation: Observation):
                 await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
             return
 
-        if isinstance(observation, CmdOutputObservation):
-            return
-        elif isinstance(observation, AgentDelegateObservation):
+        if isinstance(observation, AgentDelegateObservation):
             self.state.history.on_event(observation)
         elif isinstance(observation, ErrorObservation):
             if self.state.agent_state == AgentState.ERROR:

From d9811b00a6f941ccf2b9c29007ba1da56b1d541a Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:53:02 -0400
Subject: [PATCH 08/31] stop dropping events while pending_action is set

---
 openhands/controller/agent_controller.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 22e466a8800..ab1e7b84caf 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -216,15 +216,6 @@ async def _handle_observation(self, observation: Observation):
         Args:
             observation (observation): The observation to handle.
         """
-        if (
-            self._pending_action
-            and hasattr(self._pending_action, 'confirmation_state')
-            and self._pending_action.confirmation_state
-            == ActionConfirmationStatus.AWAITING_CONFIRMATION
-        ):
-            return
-
-        # Make sure we print the observation in the same way as the LLM sees it
         observation_to_print = copy.deepcopy(observation)
         if len(observation_to_print.content) > self.agent.llm.config.max_message_chars:
             observation_to_print.content = truncate_content(
@@ -232,7 +223,6 @@ async def _handle_observation(self, observation: Observation):
             )
         logger.info(observation_to_print, extra={'msg_type': 'OBSERVATION'})
 
-        # Merge with the metrics from the LLM - it will to synced to the controller's local metrics in update_state_after_step()
         if observation.llm_metrics is not None:
             self.agent.llm.metrics.merge(observation.llm_metrics)
 

From 47a95f70fa7317a76de7810c5df5b90aac66acac Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Fri, 25 Oct 2024 23:58:47 -0400
Subject: [PATCH 09/31] remove FatalErrorObservation

---
 openhands/controller/agent_controller.py      |  6 ++---
 openhands/controller/state/state.py           |  6 ++---
 openhands/events/observation/__init__.py      |  3 +--
 openhands/events/observation/error.py         | 15 +----------
 openhands/runtime/action_execution_server.py  |  3 +--
 .../impl/eventstream/eventstream_runtime.py   | 22 +++++++++-------
 .../runtime/impl/remote/remote_runtime.py     | 26 +++++++++++--------
 openhands/runtime/utils/bash.py               |  9 ++++---
 openhands/runtime/utils/edit.py               | 11 ++++----
 9 files changed, 46 insertions(+), 55 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index ab1e7b84caf..59f6417c40c 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -36,7 +36,6 @@
     AgentDelegateObservation,
     AgentStateChangedObservation,
     ErrorObservation,
-    FatalErrorObservation,
     Observation,
 )
 from openhands.events.serialization.event import truncate_content
@@ -237,11 +236,10 @@ async def _handle_observation(self, observation: Observation):
         if isinstance(observation, AgentDelegateObservation):
             self.state.history.on_event(observation)
         elif isinstance(observation, ErrorObservation):
+            if observation.fatal:
+                await self.set_agent_state_to(AgentState.ERROR)
             if self.state.agent_state == AgentState.ERROR:
                 self.state.metrics.merge(self.state.local_metrics)
-        elif isinstance(observation, FatalErrorObservation):
-            await self.set_agent_state_to(AgentState.ERROR)
-            self.state.metrics.merge(self.state.local_metrics)
 
     async def _handle_message_action(self, action: MessageAction):
         """Handles message actions from the event stream.
diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py
index d204c9609ec..2ef4ac20350 100644
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -11,7 +11,7 @@
     MessageAction,
 )
 from openhands.events.action.agent import AgentFinishAction
-from openhands.events.observation import ErrorObservation, FatalErrorObservation
+from openhands.events.observation import ErrorObservation
 from openhands.llm.metrics import Metrics
 from openhands.memory.history import ShortTermHistory
 from openhands.storage.files import FileStore
@@ -156,9 +156,7 @@ def __setstate__(self, state):
 
     def get_last_error(self) -> str:
         for event in self.history.get_events(reverse=True):
-            if isinstance(event, ErrorObservation) or isinstance(
-                event, FatalErrorObservation
-            ):
+            if isinstance(event, ErrorObservation):
                 return event.content
         return ''
 
diff --git a/openhands/events/observation/__init__.py b/openhands/events/observation/__init__.py
index a0fad86dfb4..28525b09aab 100644
--- a/openhands/events/observation/__init__.py
+++ b/openhands/events/observation/__init__.py
@@ -6,7 +6,7 @@
 )
 from openhands.events.observation.delegate import AgentDelegateObservation
 from openhands.events.observation.empty import NullObservation
-from openhands.events.observation.error import ErrorObservation, FatalErrorObservation
+from openhands.events.observation.error import ErrorObservation
 from openhands.events.observation.files import (
     FileEditObservation,
     FileReadObservation,
@@ -26,7 +26,6 @@
     'FileWriteObservation',
     'FileEditObservation',
     'ErrorObservation',
-    'FatalErrorObservation',
     'AgentStateChangedObservation',
     'AgentDelegateObservation',
     'SuccessObservation',
diff --git a/openhands/events/observation/error.py b/openhands/events/observation/error.py
index cfbb291eb0f..d10fa2b8b75 100644
--- a/openhands/events/observation/error.py
+++ b/openhands/events/observation/error.py
@@ -12,6 +12,7 @@ class ErrorObservation(Observation):
     E.g., Linter error after editing a file.
     """
 
+    fatal: bool = False
     observation: str = ObservationType.ERROR
 
     @property
@@ -20,17 +21,3 @@ def message(self) -> str:
 
     def __str__(self) -> str:
         return f'**ErrorObservation**\n{self.content}'
-
-
-@dataclass
-class FatalErrorObservation(Observation):
-    """This data class represents a fatal error encountered by the agent.
-
-    This is the type of error that LLM CANNOT recover from, and the agent controller should stop the execution and report the error to the user.
-    E.g., Remote runtime action execution failure: 503 Server Error: Service Unavailable for url OR 404 Not Found.
-    """
-
-    observation: str = ObservationType.ERROR
-
-    def __str__(self) -> str:
-        return f'**FatalErrorObservation**\n{self.content}'
diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py
index 2da1372532d..b9f37b6dada 100644
--- a/openhands/runtime/action_execution_server.py
+++ b/openhands/runtime/action_execution_server.py
@@ -37,7 +37,6 @@
 from openhands.events.observation import (
     CmdOutputObservation,
     ErrorObservation,
-    FatalErrorObservation,
     FileReadObservation,
     FileWriteObservation,
     IPythonRunCellObservation,
@@ -167,7 +166,7 @@ async def run_action(self, action) -> Observation:
 
     async def run(
         self, action: CmdRunAction
-    ) -> CmdOutputObservation | FatalErrorObservation:
+    ) -> CmdOutputObservation | ErrorObservation:
         return self.bash_session.run(action)
 
     async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index 1cf40969225..60f9231a723 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -25,7 +25,7 @@
 )
 from openhands.events.action.action import Action
 from openhands.events.observation import (
-    FatalErrorObservation,
+    ErrorObservation,
     NullObservation,
     Observation,
     UserRejectObservation,
@@ -452,10 +452,13 @@ def run_action(self, action: Action) -> Observation:
                 return NullObservation('')
             action_type = action.action  # type: ignore[attr-defined]
             if action_type not in ACTION_TYPE_TO_CLASS:
-                return FatalErrorObservation(f'Action {action_type} does not exist.')
+                return ErrorObservation(
+                    f'Action {action_type} does not exist.', fatal=True
+                )
             if not hasattr(self, action_type):
-                return FatalErrorObservation(
-                    f'Action {action_type} is not supported in the current runtime.'
+                return ErrorObservation(
+                    f'Action {action_type} is not supported in the current runtime.',
+                    fatal=True,
                 )
             if (
                 getattr(action, 'confirmation_state', None)
@@ -486,17 +489,18 @@ def run_action(self, action: Action) -> Observation:
                     logger.debug(f'response: {response}')
                     error_message = response.text
                     logger.error(f'Error from server: {error_message}')
-                    obs = FatalErrorObservation(
-                        f'Action execution failed: {error_message}'
+                    obs = ErrorObservation(
+                        f'Action execution failed: {error_message}', fatal=True
                     )
             except requests.Timeout:
                 logger.error('No response received within the timeout period.')
-                obs = FatalErrorObservation(
-                    f'Action execution timed out after {action.timeout} seconds.'
+                obs = ErrorObservation(
+                    f'Action execution timed out after {action.timeout} seconds.',
+                    fatal=True,
                 )
             except Exception as e:
                 logger.error(f'Error during action execution: {e}')
-                obs = FatalErrorObservation(f'Action execution failed: {str(e)}')
+                obs = ErrorObservation(f'Action execution failed: {str(e)}', fatal=True)
             self._refresh_logs()
             return obs
 
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index 8c25401af9c..f921d39e432 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -22,7 +22,7 @@
 )
 from openhands.events.action.action import Action
 from openhands.events.observation import (
-    FatalErrorObservation,
+    ErrorObservation,
     NullObservation,
     Observation,
 )
@@ -351,12 +351,14 @@ def run_action(self, action: Action) -> Observation:
                 return NullObservation('')
             action_type = action.action  # type: ignore[attr-defined]
             if action_type not in ACTION_TYPE_TO_CLASS:
-                return FatalErrorObservation(
-                    f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.'
+                return ErrorObservation(
+                    f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.',
+                    fatal=True,
                 )
             if not hasattr(self, action_type):
-                return FatalErrorObservation(
-                    f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.'
+                return ErrorObservation(
+                    f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.',
+                    fatal=True,
                 )
 
             assert action.timeout is not None
@@ -379,18 +381,20 @@ def run_action(self, action: Action) -> Observation:
                 else:
                     error_message = response.text
                     logger.error(f'Error from server: {error_message}')
-                    obs = FatalErrorObservation(
-                        f'Action execution failed: {error_message}'
+                    obs = ErrorObservation(
+                        f'Action execution failed: {error_message}', fatal=True
                     )
             except Timeout:
                 logger.error('No response received within the timeout period.')
-                obs = FatalErrorObservation(
-                    f'[Runtime (ID={self.runtime_id})] Action execution timed out'
+                obs = ErrorObservation(
+                    f'[Runtime (ID={self.runtime_id})] Action execution timed out',
+                    fatal=True,
                 )
             except Exception as e:
                 logger.error(f'Error during action execution: {e}')
-                obs = FatalErrorObservation(
-                    f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}'
+                obs = ErrorObservation(
+                    f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}',
+                    fatal=True,
                 )
             return obs
 
diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py
index fba16787c6d..98e3ad9c09b 100644
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@@ -9,7 +9,7 @@
 from openhands.events.event import EventSource
 from openhands.events.observation import (
     CmdOutputObservation,
-    FatalErrorObservation,
+    ErrorObservation,
 )
 
 SOFT_TIMEOUT_SECONDS = 5
@@ -275,7 +275,7 @@ def _continue_bash(
                 output += '\r\n' + bash_prompt
         return output, exit_code
 
-    def run(self, action: CmdRunAction) -> CmdOutputObservation | FatalErrorObservation:
+    def run(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
         try:
             assert (
                 action.timeout is not None
@@ -329,6 +329,7 @@ def run(self, action: CmdRunAction) -> CmdOutputObservation | FatalErrorObservat
                 interpreter_details=python_interpreter,
             )
         except UnicodeDecodeError as e:
-            return FatalErrorObservation(
-                f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}'
+            return ErrorObservation(
+                f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}',
+                fatal=True,
             )
diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py
index 4ed5c0edafc..39b5cbbce65 100644
--- a/openhands/runtime/utils/edit.py
+++ b/openhands/runtime/utils/edit.py
@@ -13,7 +13,6 @@
 )
 from openhands.events.observation import (
     ErrorObservation,
-    FatalErrorObservation,
     FileEditObservation,
     FileReadObservation,
     FileWriteObservation,
@@ -214,8 +213,9 @@ def edit(self, action: FileEditAction) -> Observation:
             if isinstance(obs, ErrorObservation):
                 return obs
             if not isinstance(obs, FileWriteObservation):
-                return FatalErrorObservation(
-                    f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}'
+                return ErrorObservation(
+                    f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}',
+                    fatal=True,
                 )
             return FileEditObservation(
                 content=get_diff('', action.content, action.path),
@@ -225,8 +225,9 @@ def edit(self, action: FileEditAction) -> Observation:
                 new_content=action.content,
             )
         if not isinstance(obs, FileReadObservation):
-            return FatalErrorObservation(
-                f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}'
+            return ErrorObservation(
+                f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}',
+                fatal=True,
             )
 
         original_file_content = obs.content

From d46b71272d3650792ec3e975e81735b8e10977cc Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Sat, 26 Oct 2024 00:04:49 -0400
Subject: [PATCH 10/31] refactor react_to_error a bit more

---
 openhands/controller/agent_controller.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 59f6417c40c..087b029aa8f 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -134,16 +134,13 @@ async def update_state_after_step(self):
     async def _react_to_error(
         self,
         message: str,
-        exception: Exception | None = None,
         new_state: AgentState | None = None,
     ):
         if new_state is not None:
+            # it's important to set the state before adding the error event, so that metrics sync properly
             await self.set_agent_state_to(new_state)
-        detail = str(exception) if exception is not None else ''
-        if exception is not None and isinstance(exception, litellm.AuthenticationError):
-            detail += '\nPlease check your credentials. Is your API key correct?'
         await self.event_stream.async_add_event(
-            ErrorObservation(f'{message}:{detail}'), EventSource.AGENT
+            ErrorObservation(message), EventSource.AGENT
         )
 
     async def start_step_loop(self):
@@ -160,9 +157,13 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 logger.error(f'Error while running the agent: {e}')
                 logger.error(traceback.format_exc())
+                detail = str(e)
+                if isinstance(e, litellm.AuthenticationError):
+                    detail += (
+                        '\nPlease check your credentials. Is your API key correct?'
+                    )
                 await self._react_to_error(
-                    'There was an unexpected error while running the agent',
-                    exception=e,
+                    detail,
                     new_state=AgentState.ERROR,
                 )
                 break

From f9c283ea9bc084c3a50d2b688953886db288cdaf Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Sat, 26 Oct 2024 00:09:03 -0400
Subject: [PATCH 11/31] fix comments

---
 openhands/controller/agent_controller.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 087b029aa8f..a51620f1d2c 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -425,9 +425,9 @@ async def _step(self) -> None:
             if action is None:
                 raise LLMNoActionError('No action was returned')
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
-            # report to the user
-            # and send the underlying exception to the LLM for self-correction
-            await self._react_to_error(str(e))
+            await self._react_to_error(
+                str(e), new_state=None
+            )  # don't change state, LLM can correct itself
             return
 
         if action.runnable:
@@ -473,7 +473,9 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self._react_to_error('Delegator agent encountered an error')
+            await self._react_to_error(
+                'Delegator agent encountered an error', new_state=None
+            )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
             logger.info(
                 f'[Agent Controller {self.id}] Delegate agent has finished execution'

From 1c44fa6807ab21c98807f53838ddd82914c8dd8f Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Sat, 26 Oct 2024 00:45:06 -0400
Subject: [PATCH 12/31] fix test

---
 tests/unit/test_is_stuck.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py
index 4a133075216..d42fc7392e4 100644
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@@ -440,9 +440,9 @@ def test_is_stuck_repeating_action_observation_pattern(
         read_observation_2._cause = read_action_2._id
         event_stream.add_event(read_observation_2, EventSource.USER)
 
-        # one more message to break the pattern
-        message_null_observation = NullObservation(content='')
+        message_action = MessageAction(content='Come on', wait_for_response=False)
         event_stream.add_event(message_action, EventSource.USER)
+        message_null_observation = NullObservation(content='')
         event_stream.add_event(message_null_observation, EventSource.USER)
 
         cmd_action_3 = CmdRunAction(command='ls')

From 3f67ed38d0d059da0348011733af34a72b2b8b3a Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Sat, 26 Oct 2024 01:01:30 -0400
Subject: [PATCH 13/31] fix test

---
 tests/unit/test_agent_controller.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index fbaa225c907..1aee80666c6 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -87,7 +87,7 @@ async def test_on_event_change_agent_state_action(mock_agent, mock_event_stream)
 
 
 @pytest.mark.asyncio
-async def test_report_error(mock_agent, mock_event_stream):
+async def test__react_to_error(mock_agent, mock_event_stream):
     controller = AgentController(
         agent=mock_agent,
         event_stream=mock_event_stream,
@@ -97,7 +97,7 @@ async def test_report_error(mock_agent, mock_event_stream):
         headless_mode=True,
     )
     error_message = 'Test error'
-    await controller.report_error(error_message)
+    await controller._react_to_error(error_message)
     assert controller.state.last_error == error_message
     controller.event_stream.add_event.assert_called_once()
     await controller.close()
@@ -114,12 +114,12 @@ async def test_step_with_exception(mock_agent, mock_event_stream):
         headless_mode=True,
     )
     controller.state.agent_state = AgentState.RUNNING
-    controller.report_error = AsyncMock()
+    controller._react_to_error = AsyncMock()
     controller.agent.step.side_effect = LLMMalformedActionError('Malformed action')
     await controller._step()
 
-    # Verify that report_error was called with the correct error message
-    controller.report_error.assert_called_once_with('Malformed action')
+    # Verify that _react_to_error was called with the correct error message
+    controller._react_to_error.assert_called_once_with('Malformed action')
     await controller.close()
 
 

From 6aff3818c60920d5ffa479b115088f3cceed25ba Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sun, 27 Oct 2024 04:59:55 +0100
Subject: [PATCH 14/31] Update openhands/controller/state/state.py

Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
---
 openhands/controller/state/state.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py
index 2ef4ac20350..0a84d821a6d 100644
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -157,7 +157,7 @@ def __setstate__(self, state):
     def get_last_error(self) -> str:
         for event in self.history.get_events(reverse=True):
             if isinstance(event, ErrorObservation):
-                return event.content
+                return event.content if not event.fatal else 'There was a fatal error during agent execution: ' + event.content
         return ''
 
     def get_current_user_intent(self):

From b278d45f0829125c5d1e608c8bb6f15b4a694d1b Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Tue, 29 Oct 2024 11:22:18 +0100
Subject: [PATCH 15/31] more invisible conflicts

---
 tests/unit/test_agent_controller.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py
index 6d0fe07a748..e6de80d274b 100644
--- a/tests/unit/test_agent_controller.py
+++ b/tests/unit/test_agent_controller.py
@@ -14,7 +14,6 @@
 from openhands.events.action import ChangeAgentStateAction, CmdRunAction, MessageAction
 from openhands.events.observation import (
     ErrorObservation,
-    FatalErrorObservation,
 )
 from openhands.events.serialization import event_to_dict
 from openhands.llm import LLM
@@ -148,7 +147,7 @@ async def test_run_controller_with_fatal_error(mock_agent, mock_event_stream):
     agent.llm.metrics = Metrics()
     agent.llm.config = config.get_llm_config()
 
-    fatal_error_obs = FatalErrorObservation('Fatal error detected')
+    fatal_error_obs = ErrorObservation('Fatal error detected', fatal=True)
     fatal_error_obs._cause = event.id
 
     runtime = MagicMock(spec=Runtime)
@@ -175,8 +174,8 @@ async def on_event(event: Event):
     # in side run_controller (since the while loop + sleep no longer loop)
     assert state.agent_state == AgentState.STOPPED
     assert (
-        state.last_error
-        == 'There was a fatal error during agent execution: **FatalErrorObservation**\nFatal error detected'
+        state.get_last_error()
+        == 'There was a fatal error during agent execution: **ErrorObservation**\nFatal error detected'
     )
     assert len(list(event_stream.get_events())) == 5
 

From d19f998270702147ea795820a31383a082c8adbd Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 13:10:04 -0400
Subject: [PATCH 16/31] remove fatal observations

---
 openhands/controller/agent_controller.py                  | 2 --
 openhands/controller/state/state.py                       | 7 +------
 openhands/events/observation/error.py                     | 1 -
 openhands/runtime/impl/eventstream/eventstream_runtime.py | 8 +++-----
 openhands/runtime/impl/remote/remote_runtime.py           | 8 +-------
 openhands/runtime/utils/bash.py                           | 1 -
 openhands/runtime/utils/edit.py                           | 6 ++----
 7 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index b6991c9ea92..078d145e78e 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -246,8 +246,6 @@ async def _handle_observation(self, observation: Observation):
         if isinstance(observation, AgentDelegateObservation):
             self.state.history.on_event(observation)
         elif isinstance(observation, ErrorObservation):
-            if observation.fatal:
-                await self.set_agent_state_to(AgentState.ERROR)
             if self.state.agent_state == AgentState.ERROR:
                 self.state.metrics.merge(self.state.local_metrics)
 
diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py
index 50ba265c6e3..db3b3638a8c 100644
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -157,12 +157,7 @@ def __setstate__(self, state):
     def get_last_error(self) -> str:
         for event in self.history.get_events(reverse=True):
             if isinstance(event, ErrorObservation):
-                return (
-                    event.content
-                    if not event.fatal
-                    else 'There was a fatal error during agent execution: '
-                    + event.content
-                )
+                return event.content
         return ''
 
     def get_current_user_intent(self):
diff --git a/openhands/events/observation/error.py b/openhands/events/observation/error.py
index d10fa2b8b75..c6139e86c0b 100644
--- a/openhands/events/observation/error.py
+++ b/openhands/events/observation/error.py
@@ -12,7 +12,6 @@ class ErrorObservation(Observation):
     E.g., Linter error after editing a file.
     """
 
-    fatal: bool = False
     observation: str = ObservationType.ERROR
 
     @property
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index be4c415ec05..aac4c9e5770 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -466,12 +466,11 @@ def run_action(self, action: Action) -> Observation:
             action_type = action.action  # type: ignore[attr-defined]
             if action_type not in ACTION_TYPE_TO_CLASS:
                 return ErrorObservation(
-                    f'Action {action_type} does not exist.', fatal=True
+                    f'Action {action_type} does not exist.',
                 )
             if not hasattr(self, action_type):
                 return ErrorObservation(
                     f'Action {action_type} is not supported in the current runtime.',
-                    fatal=True,
                 )
             if (
                 getattr(action, 'confirmation_state', None)
@@ -503,17 +502,16 @@ def run_action(self, action: Action) -> Observation:
                     error_message = response.text
                     self.log('error', f'Error from server: {error_message}')
                     obs = ErrorObservation(
-                        f'Action execution failed: {error_message}', fatal=True
+                        f'Action execution failed: {error_message}',
                     )
             except requests.Timeout:
                 self.log('error', 'No response received within the timeout period.')
                 obs = ErrorObservation(
                     f'Action execution timed out after {action.timeout} seconds.',
-                    fatal=True,
                 )
             except Exception as e:
                 self.log('error', f'Error during action execution: {e}')
-                obs = ErrorObservation(f'Action execution failed: {str(e)}', fatal=True)
+                obs = ErrorObservation(f'Action execution failed: {str(e)}')
             self._refresh_logs()
             return obs
 
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index 4d6e243be88..1169f7ee7a6 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -363,12 +363,10 @@ def run_action(self, action: Action) -> Observation:
             if action_type not in ACTION_TYPE_TO_CLASS:
                 return ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action {action_type} does not exist.',
-                    fatal=True,
                 )
             if not hasattr(self, action_type):
                 return ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action {action_type} is not supported in the current runtime.',
-                    fatal=True,
                 )
 
             assert action.timeout is not None
@@ -391,20 +389,16 @@ def run_action(self, action: Action) -> Observation:
                 else:
                     error_message = response.text
                     self.log('error', f'Error from server: {error_message}')
-                    obs = ErrorObservation(
-                        f'Action execution failed: {error_message}', fatal=True
-                    )
+                    obs = ErrorObservation(f'Action execution failed: {error_message}')
             except Timeout:
                 self.log('error', 'No response received within the timeout period.')
                 obs = ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action execution timed out',
-                    fatal=True,
                 )
             except Exception as e:
                 self.log('error', f'Error during action execution: {e}')
                 obs = ErrorObservation(
                     f'[Runtime (ID={self.runtime_id})] Action execution failed: {str(e)}',
-                    fatal=True,
                 )
             return obs
 
diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py
index 98e3ad9c09b..a5019315a03 100644
--- a/openhands/runtime/utils/bash.py
+++ b/openhands/runtime/utils/bash.py
@@ -331,5 +331,4 @@ def run(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
         except UnicodeDecodeError as e:
             return ErrorObservation(
                 f'Runtime bash execution failed: Command output could not be decoded as utf-8. {str(e)}',
-                fatal=True,
             )
diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py
index 478da2defc4..fe08f87b2e4 100644
--- a/openhands/runtime/utils/edit.py
+++ b/openhands/runtime/utils/edit.py
@@ -214,8 +214,7 @@ def edit(self, action: FileEditAction) -> Observation:
                 return obs
             if not isinstance(obs, FileWriteObservation):
                 return ErrorObservation(
-                    f'Fatal Runtime in editing: Expected FileWriteObservation, got {type(obs)}: {str(obs)}',
-                    fatal=True,
+                    f'Expected FileWriteObservation, got {type(obs)}: {str(obs)}',
                 )
             return FileEditObservation(
                 content=get_diff('', action.content, action.path),
@@ -226,8 +225,7 @@ def edit(self, action: FileEditAction) -> Observation:
             )
         if not isinstance(obs, FileReadObservation):
             return ErrorObservation(
-                f'Fatal Runtime in editing: Expected FileReadObservation, got {type(obs)}: {str(obs)}',
-                fatal=True,
+                f'Expected FileReadObservation, got {type(obs)}: {str(obs)}',
             )
 
         original_file_content = obs.content

From 4745cb6f135f1461407c4dcfd760f7570166a2a2 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 13:15:26 -0400
Subject: [PATCH 17/31] more refactoring

---
 openhands/controller/agent_controller.py | 38 ++++++++++--------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 078d145e78e..8961f5a4b4e 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -3,8 +3,6 @@
 import traceback
 from typing import Type
 
-import litellm
-
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State, TrafficControlState
 from openhands.controller.stuck import StuckDetector
@@ -140,17 +138,15 @@ async def update_state_after_step(self):
         # update metrics especially for cost. Use deepcopy to avoid it being modified by agent.reset()
         self.state.local_metrics = copy.deepcopy(self.agent.llm.metrics)
 
-    async def _react_to_error(
+    async def _react_to_exception(
         self,
-        message: str,
+        e: Exception,
         new_state: AgentState | None = None,
     ):
         if new_state is not None:
             # it's important to set the state before adding the error event, so that metrics sync properly
             await self.set_agent_state_to(new_state)
-        await self.event_stream.async_add_event(
-            ErrorObservation(message), EventSource.AGENT
-        )
+        raise e
 
     async def start_step_loop(self):
         """The main loop for the agent's step-by-step execution."""
@@ -165,14 +161,8 @@ async def start_step_loop(self):
             except Exception as e:
                 traceback.print_exc()
                 self.log('error', f'Error while running the agent: {e}')
-                self.log('error', traceback.format_exc())
-                detail = str(e)
-                if isinstance(e, litellm.AuthenticationError):
-                    detail += (
-                        '\nPlease check your credentials. Is your API key correct?'
-                    )
-                await self._react_to_error(
-                    detail,
+                await self._react_to_exception(
+                    exception=e,
                     new_state=AgentState.ERROR,
                 )
                 break
@@ -437,9 +427,13 @@ async def _step(self) -> None:
             if action is None:
                 raise LLMNoActionError('No action was returned')
         except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
-            await self._react_to_error(
-                str(e), new_state=None
-            )  # don't change state, LLM can correct itself
+            await self.event_stream.async_add_event(
+                ErrorObservation(
+                    content=str(e),
+                    cause=action.id,
+                    agent_state=self.get_agent_state(),
+                )
+            )
             return
 
         if action.runnable:
@@ -465,7 +459,7 @@ async def _step(self) -> None:
         self.log('debug', str(action), extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            await self._react_to_error(
+            await self._react_to_exception(
                 'Agent got stuck in a loop', new_state=AgentState.ERROR
             )
 
@@ -486,7 +480,7 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self._react_to_error(
+            await self._react_to_exception(
                 'Delegator agent encountered an error', new_state=None
             )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
@@ -537,13 +531,13 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                await self._react_to_error(
+                await self._react_to_exception(
                     f'Agent reached maximum {limit_type} in headless mode, task stopped. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}',
                     new_state=AgentState.ERROR,
                 )
             else:
-                await self._react_to_error(
+                await self._react_to_exception(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
                     f'{TRAFFIC_CONTROL_REMINDER}',

From d7fe86d4f31f6d737192e795ab664030b6df116d Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 13:30:48 -0400
Subject: [PATCH 18/31] new react_to_exception

---
 openhands/controller/agent_controller.py | 35 +++++++++++-------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 8961f5a4b4e..4cedc1159c9 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -141,7 +141,7 @@ async def update_state_after_step(self):
     async def _react_to_exception(
         self,
         e: Exception,
-        new_state: AgentState | None = None,
+        new_state: AgentState = AgentState.ERROR,
     ):
         if new_state is not None:
             # it's important to set the state before adding the error event, so that metrics sync properly
@@ -161,10 +161,7 @@ async def start_step_loop(self):
             except Exception as e:
                 traceback.print_exc()
                 self.log('error', f'Error while running the agent: {e}')
-                await self._react_to_exception(
-                    exception=e,
-                    new_state=AgentState.ERROR,
-                )
+                await self._react_to_exception(e)
                 break
 
             await asyncio.sleep(0.1)
@@ -430,9 +427,8 @@ async def _step(self) -> None:
             await self.event_stream.async_add_event(
                 ErrorObservation(
                     content=str(e),
-                    cause=action.id,
-                    agent_state=self.get_agent_state(),
-                )
+                ),
+                EventSource.AGENT,
             )
             return
 
@@ -459,9 +455,7 @@ async def _step(self) -> None:
         self.log('debug', str(action), extra={'msg_type': 'ACTION'})
 
         if self._is_stuck():
-            await self._react_to_exception(
-                'Agent got stuck in a loop', new_state=AgentState.ERROR
-            )
+            await self._react_to_exception(RuntimeError('Agent got stuck in a loop'))
 
     async def _delegate_step(self):
         """Executes a single step of the delegate agent."""
@@ -480,8 +474,9 @@ async def _delegate_step(self):
             self.delegate = None
             self.delegateAction = None
 
-            await self._react_to_exception(
-                'Delegator agent encountered an error', new_state=None
+            await self.event_stream.async_add_event(
+                ErrorObservation('Delegate agent encountered an error'),
+                EventSource.AGENT,
             )
         elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
             self.log('debug', 'Delegate agent has finished execution')
@@ -531,16 +526,18 @@ async def _handle_traffic_control(
         else:
             self.state.traffic_control_state = TrafficControlState.THROTTLING
             if self.headless_mode:
-                await self._react_to_exception(
-                    f'Agent reached maximum {limit_type} in headless mode, task stopped. '
-                    f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}',
-                    new_state=AgentState.ERROR,
+                e = RuntimeError(
+                    f'Agent reached maximum {limit_type} in headless mode. '
+                    f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}'
                 )
+                await self._react_to_exception(e)
             else:
-                await self._react_to_exception(
+                e = RuntimeError(
                     f'Agent reached maximum {limit_type}, task paused. '
                     f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
-                    f'{TRAFFIC_CONTROL_REMINDER}',
+                )
+                await self._react_to_exception(
+                    e,
                     new_state=AgentState.PAUSED,
                 )
             stop_step = True

From d240775fb0999cb28bce2d70f293aa205ba23d35 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 14:00:33 -0400
Subject: [PATCH 19/31] change up status message plumbing

---
 frontend/src/services/actions.ts              | 10 ++++++----
 frontend/src/types/Message.tsx                | 10 ++++------
 openhands/controller/agent_controller.py      | 12 +++++++++--
 openhands/runtime/base.py                     | 19 ++++++++++++++++--
 openhands/runtime/impl/e2b/e2b_runtime.py     |  4 ++--
 .../impl/eventstream/eventstream_runtime.py   | 15 +++++---------
 openhands/runtime/impl/modal/modal_runtime.py |  6 +++---
 .../runtime/impl/remote/remote_runtime.py     |  9 ++-------
 openhands/server/session/agent_session.py     | 18 ++++++++---------
 openhands/server/session/session.py           | 20 +++++++++++++------
 10 files changed, 72 insertions(+), 51 deletions(-)

diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts
index 46b6aad8513..9d215dcb146 100644
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -119,11 +119,11 @@ export function handleActionMessage(message: ActionMessage) {
 }
 
 export function handleStatusMessage(message: StatusMessage) {
-  const msg = message.status == null ? "" : message.status.trim();
+  const msg_id = message.id;
   store.dispatch(
     setCurStatusMessage({
       ...message,
-      status: msg,
+      status: msg_id,
     }),
   );
 }
@@ -139,9 +139,11 @@ export function handleAssistantMessage(data: string | SocketMessage) {
 
   if ("action" in socketMessage) {
     handleActionMessage(socketMessage);
-  } else if ("status" in socketMessage) {
+  } else if ("observation" in socketMessage) {
+    handleObservationMessage(socketMessage);
+  } else if ("status_update" in socketMessage) {
     handleStatusMessage(socketMessage);
   } else {
-    handleObservationMessage(socketMessage);
+    console.error("Unknown message type", socketMessage);
   }
 }
diff --git a/frontend/src/types/Message.tsx b/frontend/src/types/Message.tsx
index d4d365d5904..85b1d970641 100644
--- a/frontend/src/types/Message.tsx
+++ b/frontend/src/types/Message.tsx
@@ -33,10 +33,8 @@ export interface ObservationMessage {
 }
 
 export interface StatusMessage {
-  // TODO not implemented yet
-  // Whether the status is an error, default is false
-  is_error: boolean;
-
-  // A status message to display to the user
-  status: string;
+  status_update: true;
+  type: string;
+  id: string;
+  message: string;
 }
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 4cedc1159c9..4e430c05ee9 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -1,7 +1,9 @@
 import asyncio
 import copy
 import traceback
-from typing import Type
+from typing import Callable, Optional, Type
+
+import litellm
 
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State, TrafficControlState
@@ -73,6 +75,7 @@ def __init__(
         initial_state: State | None = None,
         is_delegate: bool = False,
         headless_mode: bool = True,
+        status_callback: Optional[Callable] = None,
     ):
         """Initializes a new instance of the AgentController class.
 
@@ -115,6 +118,7 @@ def __init__(
 
         # stuck helper
         self._stuck_detector = StuckDetector(self.state)
+        self._status_callback = status_callback
 
     async def close(self):
         """Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream."""
@@ -146,7 +150,11 @@ async def _react_to_exception(
         if new_state is not None:
             # it's important to set the state before adding the error event, so that metrics sync properly
             await self.set_agent_state_to(new_state)
-        raise e
+        if self._status_callback is not None:
+            err_id = ''
+            if isinstance(e, litellm.AuthenticationError):
+                err_id = 'llm_authentication_error'
+            await self._status_callback('error', err_id, str(e))
 
     async def start_step_loop(self):
         """The main loop for the agent's step-by-step execution."""
diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index 474ba741a47..1b59376924a 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -31,6 +31,14 @@
 from openhands.runtime.utils.edit import FileEditRuntimeMixin
 from openhands.utils.async_utils import call_sync_from_async
 
+STATUS_MESSAGES = {
+    'STATUS$STARTING_RUNTIME': 'Starting runtime...',
+    'STATUS$STARTING_CONTAINER': 'Starting container...',
+    'STATUS$PREPARING_CONTAINER': 'Preparing container...',
+    'STATUS$CONTAINER_STARTED': 'Container started.',
+    'STATUS$WAITING_FOR_CLIENT': 'Waiting for client...',
+}
+
 
 def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]:
     ret = {}
@@ -62,14 +70,14 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Callable | None = None,
+        status_callback: Callable | None = None,
         attach_to_existing: bool = False,
     ):
         self.sid = sid
         self.event_stream = event_stream
         self.event_stream.subscribe(EventStreamSubscriber.RUNTIME, self.on_event)
         self.plugins = plugins if plugins is not None and len(plugins) > 0 else []
-        self.status_message_callback = status_message_callback
+        self.status_callback = status_callback
         self.attach_to_existing = attach_to_existing
 
         self.config = copy.deepcopy(config)
@@ -97,6 +105,13 @@ def log(self, level: str, message: str) -> None:
         message = f'[runtime {self.sid}] {message}'
         getattr(logger, level)(message)
 
+    def send_status_message(self, message_id: str):
+        """Sends a status message if the callback function was provided."""
+        print('SEND STATUS', self.status_callback)
+        if self.status_callback:
+            msg = STATUS_MESSAGES.get(message_id, '')
+            self.status_callback('status', message_id, msg)
+
     # ====================================================================
 
     def add_env_vars(self, env_vars: dict[str, str]) -> None:
diff --git a/openhands/runtime/impl/e2b/e2b_runtime.py b/openhands/runtime/impl/e2b/e2b_runtime.py
index b5233574f0b..7c9c297f424 100644
--- a/openhands/runtime/impl/e2b/e2b_runtime.py
+++ b/openhands/runtime/impl/e2b/e2b_runtime.py
@@ -27,14 +27,14 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         sandbox: E2BSandbox | None = None,
-        status_message_callback: Optional[Callable] = None,
+        status_callback: Optional[Callable] = None,
     ):
         super().__init__(
             config,
             event_stream,
             sid,
             plugins,
-            status_message_callback=status_message_callback,
+            status_callback=status_callback,
         )
         if sandbox is None:
             self.sandbox = E2BSandbox()
diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
index aac4c9e5770..17814cf67ff 100644
--- a/openhands/runtime/impl/eventstream/eventstream_runtime.py
+++ b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -123,7 +123,7 @@ def init_base_runtime(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Callable | None = None,
+        status_callback: Callable | None = None,
         attach_to_existing: bool = False,
     ):
         super().__init__(
@@ -132,7 +132,7 @@ def init_base_runtime(
             sid,
             plugins,
             env_vars,
-            status_message_callback,
+            status_callback,
             attach_to_existing,
         )
 
@@ -143,7 +143,7 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Callable | None = None,
+        status_callback: Callable | None = None,
         attach_to_existing: bool = False,
     ):
         self.config = config
@@ -151,7 +151,7 @@ def __init__(
         self._container_port = 30001  # initial dummy value
         self.api_url = f'{self.config.sandbox.local_runtime_url}:{self._container_port}'
         self.session = requests.Session()
-        self.status_message_callback = status_message_callback
+        self.status_callback = status_callback
 
         self.docker_client: docker.DockerClient = self._init_docker_client()
         self.base_container_image = self.config.sandbox.base_container_image
@@ -181,7 +181,7 @@ def __init__(
             sid,
             plugins,
             env_vars,
-            status_message_callback,
+            status_callback,
             attach_to_existing,
         )
 
@@ -664,8 +664,3 @@ def _find_available_port(self, max_attempts=5):
                 return port
         # If no port is found after max_attempts, return the last tried port
         return port
-
-    def send_status_message(self, message: str):
-        """Sends a status message if the callback function was provided."""
-        if self.status_message_callback:
-            self.status_message_callback(message)
diff --git a/openhands/runtime/impl/modal/modal_runtime.py b/openhands/runtime/impl/modal/modal_runtime.py
index 3a484c43e69..0e598a437f4 100644
--- a/openhands/runtime/impl/modal/modal_runtime.py
+++ b/openhands/runtime/impl/modal/modal_runtime.py
@@ -75,7 +75,7 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Callable | None = None,
+        status_callback: Callable | None = None,
         attach_to_existing: bool = False,
     ):
         assert config.modal_api_token_id, 'Modal API token id is required'
@@ -102,7 +102,7 @@ def __init__(
         self.container_port = 3000
 
         self.session = requests.Session()
-        self.status_message_callback = status_message_callback
+        self.status_callback = status_callback
         self.base_container_image_id = self.config.sandbox.base_container_image
         self.runtime_container_image_id = self.config.sandbox.runtime_container_image
         self.action_semaphore = threading.Semaphore(1)  # Ensure one action at a time
@@ -122,7 +122,7 @@ def __init__(
             sid,
             plugins,
             env_vars,
-            status_message_callback,
+            status_callback,
             attach_to_existing,
         )
 
diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py
index 1169f7ee7a6..a628db49654 100644
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@@ -51,7 +51,7 @@ def __init__(
         sid: str = 'default',
         plugins: list[PluginRequirement] | None = None,
         env_vars: dict[str, str] | None = None,
-        status_message_callback: Optional[Callable] = None,
+        status_callback: Optional[Callable] = None,
         attach_to_existing: bool = False,
     ):
         super().__init__(
@@ -60,7 +60,7 @@ def __init__(
             sid,
             plugins,
             env_vars,
-            status_message_callback,
+            status_callback,
             attach_to_existing,
         )
 
@@ -539,8 +539,3 @@ def copy_from(self, path: str) -> bytes:
             raise RuntimeError(
                 f'[Runtime (ID={self.runtime_id})] Copy operation failed: {str(e)}'
             )
-
-    def send_status_message(self, message: str):
-        """Sends a status message if the callback function was provided."""
-        if self.status_message_callback:
-            self.status_message_callback(message)
diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
index 41ee9688916..b7fd08ba823 100644
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -32,7 +32,12 @@ class AgentSession:
     _closed: bool = False
     loop: asyncio.AbstractEventLoop | None = None
 
-    def __init__(self, sid: str, file_store: FileStore):
+    def __init__(
+        self,
+        sid: str,
+        file_store: FileStore,
+        status_callback: Optional[Callable] = None,
+    ):
         """Initializes a new instance of the Session class
 
         Parameters:
@@ -43,6 +48,7 @@ def __init__(self, sid: str, file_store: FileStore):
         self.sid = sid
         self.event_stream = EventStream(sid, file_store)
         self.file_store = file_store
+        self._status_callback = status_callback
 
     async def start(
         self,
@@ -53,7 +59,6 @@ async def start(
         max_budget_per_task: float | None = None,
         agent_to_llm_config: dict[str, LLMConfig] | None = None,
         agent_configs: dict[str, AgentConfig] | None = None,
-        status_message_callback: Optional[Callable] = None,
     ):
         """Starts the Agent session
         Parameters:
@@ -80,7 +85,6 @@ async def start(
             max_budget_per_task,
             agent_to_llm_config,
             agent_configs,
-            status_message_callback,
         )
 
     def _start_thread(self, *args):
@@ -99,7 +103,6 @@ async def _start(
         max_budget_per_task: float | None = None,
         agent_to_llm_config: dict[str, LLMConfig] | None = None,
         agent_configs: dict[str, AgentConfig] | None = None,
-        status_message_callback: Optional[Callable] = None,
     ):
         self.loop = asyncio.get_running_loop()
         self._create_security_analyzer(config.security.security_analyzer)
@@ -107,7 +110,6 @@ async def _start(
             runtime_name=runtime_name,
             config=config,
             agent=agent,
-            status_message_callback=status_message_callback,
         )
         self._create_controller(
             agent,
@@ -161,7 +163,6 @@ async def _create_runtime(
         runtime_name: str,
         config: AppConfig,
         agent: Agent,
-        status_message_callback: Optional[Callable] = None,
     ):
         """Creates a runtime instance
 
@@ -181,7 +182,7 @@ async def _create_runtime(
             event_stream=self.event_stream,
             sid=self.sid,
             plugins=agent.sandbox_plugins,
-            status_message_callback=status_message_callback,
+            status_callback=self._status_callback,
         )
 
         try:
@@ -251,9 +252,8 @@ def _create_controller(
             agent_to_llm_config=agent_to_llm_config,
             agent_configs=agent_configs,
             confirmation_mode=confirmation_mode,
-            # AgentSession is designed to communicate with the frontend, so we don't want to
-            # run the agent in headless mode.
             headless_mode=False,
+            status_callback=self._status_callback,
         )
         try:
             agent_state = State.restore_from_session(self.sid, self.file_store)
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index 4e6119a1856..b83f09b2493 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -42,7 +42,9 @@ def __init__(
         self.sid = sid
         self.websocket = ws
         self.last_active_ts = int(time.time())
-        self.agent_session = AgentSession(sid, file_store)
+        self.agent_session = AgentSession(
+            sid, file_store, status_callback=self.queue_status_message
+        )
         self.agent_session.event_stream.subscribe(
             EventStreamSubscriber.SERVER, self.on_event
         )
@@ -116,7 +118,6 @@ async def _initialize_agent(self, data: dict):
                 max_budget_per_task=self.config.max_budget_per_task,
                 agent_to_llm_config=self.config.get_agent_to_llm_config_map(),
                 agent_configs=self.config.get_agent_configs(),
-                status_message_callback=self.queue_status_message,
             )
         except Exception as e:
             logger.exception(f'Error creating controller: {e}')
@@ -177,6 +178,7 @@ async def send(self, data: dict[str, object]) -> bool:
         try:
             if self.websocket is None or not self.is_alive:
                 return False
+            print('SEND JSON', data)
             await self.websocket.send_json(data)
             await asyncio.sleep(0.001)  # This flushes the data to the client
             self.last_active_ts = int(time.time())
@@ -196,9 +198,12 @@ async def send_message(self, message: str) -> bool:
         """Sends a message to the client."""
         return await self.send({'message': message})
 
-    async def send_status_message(self, message: str) -> bool:
+    async def send_status_message(self, msg_type: str, id: str, message: str) -> bool:
         """Sends a status message to the client."""
-        return await self.send({'status': message})
+        print('SEND STATUS BACK TO CLIENT')
+        return await self.send(
+            {'status_update': True, 'type': msg_type, 'id': id, 'message': message}
+        )
 
     def update_connection(self, ws: WebSocket):
         self.websocket = ws
@@ -212,7 +217,10 @@ def load_from_data(self, data: dict) -> bool:
         self.is_alive = data.get('is_alive', False)
         return True
 
-    def queue_status_message(self, message: str):
+    def queue_status_message(self, msg_type: str, id: str, message: str):
         """Queues a status message to be sent asynchronously."""
         # Ensure the coroutine runs in the main event loop
-        asyncio.run_coroutine_threadsafe(self.send_status_message(message), self.loop)
+        print('QUEUE STATUS MESSAGE')
+        asyncio.run_coroutine_threadsafe(
+            self.send_status_message(msg_type, id, message), self.loop
+        )

From d9f834e8c2785910d7dcd829a208e77450c8580a Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 14:47:16 -0400
Subject: [PATCH 20/31] logspam

---
 openhands/runtime/base.py           | 1 -
 openhands/server/session/session.py | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py
index 1b59376924a..d3b236131d1 100644
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -107,7 +107,6 @@ def log(self, level: str, message: str) -> None:
 
     def send_status_message(self, message_id: str):
         """Sends a status message if the callback function was provided."""
-        print('SEND STATUS', self.status_callback)
         if self.status_callback:
             msg = STATUS_MESSAGES.get(message_id, '')
             self.status_callback('status', message_id, msg)
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index b83f09b2493..2784cac5f0d 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -178,7 +178,6 @@ async def send(self, data: dict[str, object]) -> bool:
         try:
             if self.websocket is None or not self.is_alive:
                 return False
-            print('SEND JSON', data)
             await self.websocket.send_json(data)
             await asyncio.sleep(0.001)  # This flushes the data to the client
             self.last_active_ts = int(time.time())
@@ -200,7 +199,6 @@ async def send_message(self, message: str) -> bool:
 
     async def send_status_message(self, msg_type: str, id: str, message: str) -> bool:
         """Sends a status message to the client."""
-        print('SEND STATUS BACK TO CLIENT')
         return await self.send(
             {'status_update': True, 'type': msg_type, 'id': id, 'message': message}
         )
@@ -220,7 +218,6 @@ def load_from_data(self, data: dict) -> bool:
     def queue_status_message(self, msg_type: str, id: str, message: str):
         """Queues a status message to be sent asynchronously."""
         # Ensure the coroutine runs in the main event loop
-        print('QUEUE STATUS MESSAGE')
         asyncio.run_coroutine_threadsafe(
             self.send_status_message(msg_type, id, message), self.loop
         )

From 99486f4ae7ac35ca9b326dfa188998b015c9a36a Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 14:52:26 -0400
Subject: [PATCH 21/31] fix await

---
 openhands/controller/agent_controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 4e430c05ee9..568877a6ffa 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -154,7 +154,7 @@ async def _react_to_exception(
             err_id = ''
             if isinstance(e, litellm.AuthenticationError):
                 err_id = 'llm_authentication_error'
-            await self._status_callback('error', err_id, str(e))
+            self._status_callback('error', err_id, str(e))
 
     async def start_step_loop(self):
         """The main loop for the agent's step-by-step execution."""

From ed7f4ee34d073b0d3ba2abdd02695d9c77f378c7 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 15:25:30 -0400
Subject: [PATCH 22/31] better error toasts on frontend

---
 frontend/src/components/AgentStatusBar.tsx | 28 +++++++++++++++-------
 frontend/src/services/actions.ts           |  2 --
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx
index c337a838f32..be6eba4448a 100644
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -5,6 +5,7 @@ import { I18nKey } from "#/i18n/declaration";
 import { RootState } from "#/store";
 import AgentState from "#/types/AgentState";
 import beep from "#/utils/beep";
+import toast from "react-hot-toast";
 
 enum IndicatorColor {
   BLUE = "bg-blue-500",
@@ -16,7 +17,7 @@ enum IndicatorColor {
 }
 
 function AgentStatusBar() {
-  const { t } = useTranslation();
+  const { t, i18n } = useTranslation();
   const { curAgentState } = useSelector((state: RootState) => state.agent);
   const { curStatusMessage } = useSelector((state: RootState) => state.status);
 
@@ -94,15 +95,26 @@ function AgentStatusBar() {
   const [statusMessage, setStatusMessage] = React.useState<string>("");
 
   React.useEffect(() => {
-    if (curAgentState === AgentState.LOADING) {
-      const trimmedCustomMessage = curStatusMessage.status.trim();
-      if (trimmedCustomMessage) {
-        setStatusMessage(t(trimmedCustomMessage));
-        return;
+    let message = curStatusMessage.message || '';
+    if (curStatusMessage?.id) {
+      const id = curStatusMessage.id.trim();
+      console.log('status message id', id);
+      if (i18n.exists(id)) {
+        console.log('exists');
+        message = t(curStatusMessage.id.trim()) || message;
       }
     }
-    setStatusMessage(AgentStatusMap[curAgentState].message);
-  }, [curAgentState, curStatusMessage.status]);
+    if (curStatusMessage?.type === "error") {
+      console.log('error', message);
+      toast.error(message);
+      return;
+    }
+    if (curAgentState === AgentState.LOADING && message.trim()) {
+      setStatusMessage(message);
+    } else {
+      setStatusMessage(AgentStatusMap[curAgentState].message);
+    }
+  }, [curAgentState, curStatusMessage.id]);
 
   return (
     <div className="flex flex-col items-center">
diff --git a/frontend/src/services/actions.ts b/frontend/src/services/actions.ts
index 9d215dcb146..620f4513113 100644
--- a/frontend/src/services/actions.ts
+++ b/frontend/src/services/actions.ts
@@ -119,11 +119,9 @@ export function handleActionMessage(message: ActionMessage) {
 }
 
 export function handleStatusMessage(message: StatusMessage) {
-  const msg_id = message.id;
   store.dispatch(
     setCurStatusMessage({
       ...message,
-      status: msg_id,
     }),
   );
 }

From 2e342b870e2dcbf531010bcd73e4014ef1bdbf5e Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 15:32:51 -0400
Subject: [PATCH 23/31] better error plumbing

---
 frontend/src/components/AgentStatusBar.tsx | 7 ++++++-
 openhands/controller/agent_controller.py   | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx
index be6eba4448a..351f4924bba 100644
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -95,6 +95,7 @@ function AgentStatusBar() {
   const [statusMessage, setStatusMessage] = React.useState<string>("");
 
   React.useEffect(() => {
+    console.log('cur status message', curStatusMessage);
     let message = curStatusMessage.message || '';
     if (curStatusMessage?.id) {
       const id = curStatusMessage.id.trim();
@@ -114,7 +115,11 @@ function AgentStatusBar() {
     } else {
       setStatusMessage(AgentStatusMap[curAgentState].message);
     }
-  }, [curAgentState, curStatusMessage.id]);
+  }, [curStatusMessage.id]);
+
+  React.useEffect(() => {
+      setStatusMessage(AgentStatusMap[curAgentState].message);
+  }, [curAgentState]);
 
   return (
     <div className="flex flex-col items-center">
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 568877a6ffa..7a02166deb0 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -322,9 +322,10 @@ async def set_agent_state_to(self, new_state: AgentState):
             )
 
         self.state.agent_state = new_state
-        self.event_stream.add_event(
+        await self.event_stream.async_add_event(
             AgentStateChangedObservation('', self.state.agent_state), EventSource.AGENT
         )
+        print('sent state change obs')
 
         if new_state == AgentState.INIT and self.state.resume_state:
             await self.set_agent_state_to(self.state.resume_state)

From 154b838b442fdcbcd839a80d0fb63205d024880f Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 15:33:04 -0400
Subject: [PATCH 24/31] logspam

---
 frontend/src/components/AgentStatusBar.tsx | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx
index 351f4924bba..8aeb567f096 100644
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -95,18 +95,14 @@ function AgentStatusBar() {
   const [statusMessage, setStatusMessage] = React.useState<string>("");
 
   React.useEffect(() => {
-    console.log('cur status message', curStatusMessage);
     let message = curStatusMessage.message || '';
     if (curStatusMessage?.id) {
       const id = curStatusMessage.id.trim();
-      console.log('status message id', id);
       if (i18n.exists(id)) {
-        console.log('exists');
         message = t(curStatusMessage.id.trim()) || message;
       }
     }
     if (curStatusMessage?.type === "error") {
-      console.log('error', message);
       toast.error(message);
       return;
     }

From 45ba7cf1139ded62425acea7ccafed3e8888f237 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Tue, 29 Oct 2024 16:36:19 -0400
Subject: [PATCH 25/31] better restarting

---
 openhands/agenthub/codeact_agent/codeact_agent.py |  2 +-
 openhands/controller/agent_controller.py          |  2 +-
 openhands/server/session/agent_session.py         |  1 +
 openhands/server/session/session.py               | 12 ++++++------
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index 997d424c515..9dca9baf235 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -110,7 +110,7 @@ def __init__(
                 codeact_enable_jupyter=self.config.codeact_enable_jupyter,
                 codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
             )
-            logger.info(
+            logger.debug(
                 f'TOOLS loaded for CodeActAgent: {json.dumps(self.tools, indent=2)}'
             )
             self.system_prompt = codeact_function_calling.SYSTEM_PROMPT
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 7a02166deb0..990cb6deb88 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -170,7 +170,6 @@ async def start_step_loop(self):
                 traceback.print_exc()
                 self.log('error', f'Error while running the agent: {e}')
                 await self._react_to_exception(e)
-                break
 
             await asyncio.sleep(0.1)
 
@@ -256,6 +255,7 @@ async def _handle_message_action(self, action: MessageAction):
                 str(action),
                 extra={'msg_type': 'ACTION', 'event_source': EventSource.USER},
             )
+            print('GOT MESSAGE ACTION, RESTARTING')
             if self.get_agent_state() != AgentState.RUNNING:
                 await self.set_agent_state_to(AgentState.RUNNING)
         elif action.source == EventSource.AGENT and action.wait_for_response:
diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
index b7fd08ba823..6194f6d3a6c 100644
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -128,6 +128,7 @@ async def _start(
 
     async def close(self):
         """Closes the Agent session"""
+        print('CLOSING AGENT SESSION')
 
         if self._closed:
             return
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index 2784cac5f0d..678982651af 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -52,6 +52,7 @@ def __init__(
         self.loop = asyncio.get_event_loop()
 
     async def close(self):
+        print('CLOSE SESSION')
         self.is_alive = False
         await self.agent_session.close()
 
@@ -67,9 +68,11 @@ async def loop_recv(self):
                     continue
                 await self.dispatch(data)
         except WebSocketDisconnect:
+            print('WebSocketDisconnect, closing!')
             await self.close()
             logger.debug('WebSocket disconnected, sid: %s', self.sid)
         except RuntimeError as e:
+            print('RuntimeError, closing!', e)
             await self.close()
             logger.exception('Error in loop_recv: %s', e)
 
@@ -167,12 +170,9 @@ async def dispatch(self, data: dict):
                     )
                     return
         if self.agent_session.loop:
-            asyncio.run_coroutine_threadsafe(
-                self._add_event(event, EventSource.USER), self.agent_session.loop
-            )  # type: ignore
-
-    async def _add_event(self, event, event_source):
-        self.agent_session.event_stream.add_event(event, EventSource.USER)
+            await self.agent_session.event_stream.async_add_event(
+                event, EventSource.USER
+            )
 
     async def send(self, data: dict[str, object]) -> bool:
         try:

From 9c28805718d0c3cfded63764572abece9d5ad154 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Wed, 30 Oct 2024 04:55:20 +0100
Subject: [PATCH 26/31] Update openhands/controller/agent_controller.py

---
 openhands/controller/agent_controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 4e430c05ee9..9b268536e71 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -1,7 +1,7 @@
 import asyncio
 import copy
 import traceback
-from typing import Callable, Optional, Type
+from typing import Callable, Type
 
 import litellm
 

From b757831adbf51143e7a2e66589dc963a970c7365 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Wed, 30 Oct 2024 04:55:52 +0100
Subject: [PATCH 27/31] Update openhands/controller/agent_controller.py

---
 openhands/controller/agent_controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 9b268536e71..a1e3d98f7b6 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -75,7 +75,7 @@ def __init__(
         initial_state: State | None = None,
         is_delegate: bool = False,
         headless_mode: bool = True,
-        status_callback: Optional[Callable] = None,
+        status_callback: Callable | None = None,
     ):
         """Initializes a new instance of the AgentController class.
 

From 3e446bb47d1e46d13e92a8cc158c9e1c8af0ea8b Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 16:50:30 -0400
Subject: [PATCH 28/31] delint

---
 frontend/src/components/AgentStatusBar.tsx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/frontend/src/components/AgentStatusBar.tsx b/frontend/src/components/AgentStatusBar.tsx
index 8aeb567f096..7de9ae0397e 100644
--- a/frontend/src/components/AgentStatusBar.tsx
+++ b/frontend/src/components/AgentStatusBar.tsx
@@ -1,11 +1,11 @@
 import React, { useEffect } from "react";
 import { useTranslation } from "react-i18next";
 import { useSelector } from "react-redux";
+import toast from "react-hot-toast";
 import { I18nKey } from "#/i18n/declaration";
 import { RootState } from "#/store";
 import AgentState from "#/types/AgentState";
 import beep from "#/utils/beep";
-import toast from "react-hot-toast";
 
 enum IndicatorColor {
   BLUE = "bg-blue-500",
@@ -95,7 +95,7 @@ function AgentStatusBar() {
   const [statusMessage, setStatusMessage] = React.useState<string>("");
 
   React.useEffect(() => {
-    let message = curStatusMessage.message || '';
+    let message = curStatusMessage.message || "";
     if (curStatusMessage?.id) {
       const id = curStatusMessage.id.trim();
       if (i18n.exists(id)) {
@@ -114,7 +114,7 @@ function AgentStatusBar() {
   }, [curStatusMessage.id]);
 
   React.useEffect(() => {
-      setStatusMessage(AgentStatusMap[curAgentState].message);
+    setStatusMessage(AgentStatusMap[curAgentState].message);
   }, [curAgentState]);
 
   return (

From 9289cdbd61547b5c3f19e5a409ac5ab3f69a5b02 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 16:56:42 -0400
Subject: [PATCH 29/31] fix threading

---
 openhands/server/session/agent_session.py | 1 -
 openhands/server/session/session.py       | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py
index 6194f6d3a6c..b7fd08ba823 100644
--- a/openhands/server/session/agent_session.py
+++ b/openhands/server/session/agent_session.py
@@ -128,7 +128,6 @@ async def _start(
 
     async def close(self):
         """Closes the Agent session"""
-        print('CLOSING AGENT SESSION')
 
         if self._closed:
             return
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index 678982651af..a67918805db 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -170,9 +170,10 @@ async def dispatch(self, data: dict):
                     )
                     return
         if self.agent_session.loop:
-            await self.agent_session.event_stream.async_add_event(
-                event, EventSource.USER
-            )
+            asyncio.run_coroutine_threadsafe(
+                self.agent_session.event_stream.add_event(event, EventSource.USER),
+                self.agent_session.loop,
+            )  # type: ignore
 
     async def send(self, data: dict[str, object]) -> bool:
         try:

From 52bbdd8b307e64ad383a7eed018440718f2f45a5 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 17:01:11 -0400
Subject: [PATCH 30/31] fix async

---
 openhands/server/session/session.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index a67918805db..a2ad87d214f 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -171,7 +171,9 @@ async def dispatch(self, data: dict):
                     return
         if self.agent_session.loop:
             asyncio.run_coroutine_threadsafe(
-                self.agent_session.event_stream.add_event(event, EventSource.USER),
+                self.agent_session.event_stream.async_add_event(
+                    event, EventSource.USER
+                ),
                 self.agent_session.loop,
             )  # type: ignore
 

From e78af120808b7f512a492e8605a0098940338d55 Mon Sep 17 00:00:00 2001
From: Robert Brennan <contact@rbren.io>
Date: Wed, 30 Oct 2024 17:05:59 -0400
Subject: [PATCH 31/31] add id for auth errors

---
 frontend/src/i18n/translation.json       | 6 ++++++
 openhands/controller/agent_controller.py | 2 +-
 openhands/server/session/session.py      | 3 ---
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json
index 795c60e051f..3893d9f4f68 100644
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -1510,5 +1510,11 @@
     "ar": "في انتظار جاهزية العميل...",
     "fr": "En attente que le client soit prêt...",
     "tr": "İstemcinin hazır olması bekleniyor..."
+  },
+  "STATUS$ERROR_LLM_AUTHENTICATION": {
+    "en": "Error authenticating with the LLM provider. Please check your API key"
+  },
+  "STATUS$ERROR_RUNTIME_DISCONNECTED": {
+    "en": "There was an error while connecting to the runtime. Please refresh the page."
   }
 }
diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py
index 8ffc278a753..4d572f8c390 100644
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -153,7 +153,7 @@ async def _react_to_exception(
         if self._status_callback is not None:
             err_id = ''
             if isinstance(e, litellm.AuthenticationError):
-                err_id = 'llm_authentication_error'
+                err_id = 'STATUS$ERROR_LLM_AUTHENTICATION'
             self._status_callback('error', err_id, str(e))
 
     async def start_step_loop(self):
diff --git a/openhands/server/session/session.py b/openhands/server/session/session.py
index a2ad87d214f..f5bdc20e0c6 100644
--- a/openhands/server/session/session.py
+++ b/openhands/server/session/session.py
@@ -52,7 +52,6 @@ def __init__(
         self.loop = asyncio.get_event_loop()
 
     async def close(self):
-        print('CLOSE SESSION')
         self.is_alive = False
         await self.agent_session.close()
 
@@ -68,11 +67,9 @@ async def loop_recv(self):
                     continue
                 await self.dispatch(data)
         except WebSocketDisconnect:
-            print('WebSocketDisconnect, closing!')
             await self.close()
             logger.debug('WebSocket disconnected, sid: %s', self.sid)
         except RuntimeError as e:
-            print('RuntimeError, closing!', e)
             await self.close()
             logger.exception('Error in loop_recv: %s', e)