diff --git a/dependency/llama.cpp b/dependency/llama.cpp
index fe8c7b45..80c0f3dd 160000
--- a/dependency/llama.cpp
+++ b/dependency/llama.cpp
@@ -1 +1 @@
-Subproject commit fe8c7b45fd5eca1c38a09c257ebf8cf1ccae3a4a
+Subproject commit 80c0f3dd611f6d12c84a0860460ec8cb7904dff4
diff --git a/nexa/__init__.py b/nexa/__init__.py
index c2582443..488bc026 100644
--- a/nexa/__init__.py
+++ b/nexa/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.9.3"
+__version__ = "0.0.9.4"
diff --git a/nexa/gguf/nexa_inference_vlm_omni.py b/nexa/gguf/nexa_inference_vlm_omni.py
index 80a60bb2..021f79e7 100644
--- a/nexa/gguf/nexa_inference_vlm_omni.py
+++ b/nexa/gguf/nexa_inference_vlm_omni.py
@@ -149,9 +149,6 @@ def run(self):
 
     def inference(self, prompt: str, image_path: str):
         with suppress_stdout_stderr():
-            if prompt and prompt[0].islower():
-                prompt = prompt[0].upper() + prompt[1:]
-                
             prompt = ctypes.c_char_p(prompt.encode("utf-8"))
             image_path = ctypes.c_char_p(image_path.encode("utf-8"))
             response = omni_vlm_cpp.omnivlm_inference(prompt, image_path)