Improving functionality

AmberSahdev · Feb 27, 2024 · df77190 · df77190
1 parent ea50c2b
commit df77190
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 67 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 	<img src="app/resources/icon.png" align="right" alt="Open Interface Logo" width="120" height="120">
 </picture>
 
-### Have LLMs Operate Your Computer 
+### Make LLMs Operate Your Computer for You
 #### Complete Tedious Everyday-tasks with One Command 
 
 Open Interface can

diff --git a/app/app.py b/app/app.py
@@ -4,6 +4,9 @@
 from ui import UI
 
 
+# Imports to make packaging work
+import appdirs
+
 class App:
     def __init__(self):
         self.core = Core()

diff --git a/app/core.py b/app/core.py
@@ -39,6 +39,10 @@ def execute(self, user_request, step_num=0):
         try:
             instructions = self.llm.get_instructions_for_objective(user_request, step_num)
 
+            if instructions == {}:
+                # Sometimes LLM sends malformed JSON response, in that case retry once more.
+                instructions = self.llm.get_instructions_for_objective(user_request + " Please reply in valid JSON", step_num)
+
             for step in instructions["steps"]:
                 if self.interrupt_execution:
                     self.status_queue.put("Interrupted")
@@ -60,4 +64,5 @@ def execute(self, user_request, step_num=0):
             return instructions["done"]
         else:
             # if not done, continue to next phase
+            self.status_queue.put("Fetching further instructions based on current state")
             return self.execute(user_request, step_num + 1)
diff --git a/app/llm.py b/app/llm.py
@@ -43,7 +43,7 @@ class LLM:
     function is the function name to call in the executor.
     parameters are the parameters of the above function.
     human_readable_justification is what we can use to debug in case program fails somewhere or to explain to user why we're doing what we're doing.
-    done is None if user request is not complete, and it's a string when it's complete that either contains the
+    done is null if user request is not complete, and it's a string when it's complete that either contains the
         information that the user asked for, or just acknowledges completion of the user requested task. This is going
         to be communicated to the user if it's present.
 
@@ -63,7 +63,7 @@ def __init__(self):
         if settings_dict['api_key']:
             os.environ["OPENAI_API_KEY"] = settings_dict['api_key']
 
-        with open('resources/context.txt', 'r') as file:
+        with open('./resources/context.txt', 'r') as file:
             self.context = file.read()
 
         if settings_dict['default_browser']:
@@ -121,6 +121,7 @@ def convert_llm_response_to_json(self, llm_response):
         llm_response_data = llm_response.choices[0].message.content.strip()
 
         # Our current LLM model does not guarantee a JSON response, hence we manually parse the JSON part of the response
+        # Check for updates here - https://platform.openai.com/docs/guides/text-generation/json-mode
         start_index = llm_response_data.find('{')
         end_index = llm_response_data.rfind('}')
 
@@ -129,6 +130,11 @@ def convert_llm_response_to_json(self, llm_response):
         except Exception as e:
             print(f'llm_response_data[start_index:end_index + 1] - {llm_response_data[start_index:end_index + 1]}')
             print(f'Error while parsing JSON response - {e}')
+
+            # TODO: Temporary for debugging
+            with open("faulty_json_recieved.json", "w") as f:
+                f.write(llm_response_data[start_index:end_index + 1].strip())
+
             json_response = {}
 
         return json_response
diff --git a/app/resources/context.txt b/app/resources/context.txt
@@ -30,7 +30,7 @@ Expected LLM Response
 "function" is the function name to call in the executor.
 "parameters" is the parameters of the above function.
 "human_readable_justification" is what we can use to debug in case program fails somewhere or to explain to user why we're doing what we're doing.
-"done" is None if user request is not complete, and it's a string when it's complete that either contains the information that the user asked for, or just acknowledges completion of the user requested task. This is going to be communicated to the user if it's present. Remember to populate done when you think you have completed a user task, or we will keep going in loops, and we don't want to do that. But also make sure with a screenshot that the job is actually done. This is important.
+"done" is null if user request is not complete, and it's a string when it's complete that either contains the information that the user asked for, or just acknowledges completion of the user requested task. This is going to be communicated to the user if it's present. Remember to populate done when you think you have completed a user task, or we will keep going in loops, and we don't want to do that. But also make sure with a screenshot that the job is actually done. This is important.
 
 To control the keyboard and mouse of my computer, use the pyautogui library.
 Keyboard Documentation: [Text from: https://raw.githubusercontent.com/asweigart/pyautogui/master/docs/keyboard.rst]
@@ -55,7 +55,7 @@ Here are some directions based on your past behavior to make you better:
 13. Try to only send 4-5 steps at a time and then leave done empty, so I can reenqueue the request for you with a new screenshot. This is very important! Without new screenshots you generally do not perform well.
 14. pyautogui.press("enter") is not the same as pyautogui.write("\n") - please do not interchange them. You keep doing that.
 15. Try going to links directly instead of searching for them. This is very important.
-16. Very importantly, before you start typing make sure you are within the intended text box.
+16. Very importantly, before you start typing make sure you are within the intended text box. Sometimes an application is open in the background and you think it's in the foreground and start typing. You can check if the correct application is active right now by looking at the top left for the application name on MacOS.
 17. Try not switching applications with keyboard shortcuts, except always launch applications with spotlight.
 
 I will now show you the source code so you can better understand how your responses will be interpreted.

diff --git a/pyinstaller-build.py b/pyinstaller-build.py