diff --git a/.env b/.env
new file mode 100644
index 00000000..e033553a
--- /dev/null
+++ b/.env
@@ -0,0 +1,18 @@
+# settings for OpenAI
+OPENAI_API_BASE=https://api.openai.com/v1
+OPENAI_API_KEY=
+OPENAI_CHAT_MODEL=gpt-3.5-turbo
+OPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002
+
+# settings for Azure OpenAI
+AZURE_OPENAI_ENDPOINT=
+AZURE_OPENAI_API_KEY=
+OPENAI_API_VERSION=2024-02-15-preview
+AZURE_OPENAI_CHAT_DEPLOYMENT=gpt-35-turbo
+AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002
+
+# settings for Cohere
+COHERE_API_KEY=
+
+# settings for local models
+LOCAL_MODEL=
diff --git a/.env.secret b/.env.secret
deleted file mode 100644
index d4e172f8..00000000
Binary files a/.env.secret and /dev/null differ
diff --git a/.gitignore b/.gitignore
index 47b78c70..01142788 100644
--- a/.gitignore
+++ b/.gitignore
@@ -332,7 +332,6 @@ celerybeat.pid
 *.sage.py
 
 # Environments
-.env
 .venv
 env/
 venv/
@@ -457,7 +456,6 @@ $RECYCLE.BIN/
 logs/
 .gitsecret/keys/random_seed
 !*.secret
-.env
 .envrc
 
 S.gpg-agent*
@@ -467,4 +465,4 @@ storage/*
 
 # Conda and env storages
 *install_dir/
-doc_env
+doc_env/
diff --git a/.gitsecret/keys/pubring.kbx b/.gitsecret/keys/pubring.kbx
deleted file mode 100644
index 55935eed..00000000
Binary files a/.gitsecret/keys/pubring.kbx and /dev/null differ
diff --git a/.gitsecret/keys/trustdb.gpg b/.gitsecret/keys/trustdb.gpg
deleted file mode 100644
index e599a3ba..00000000
Binary files a/.gitsecret/keys/trustdb.gpg and /dev/null differ
diff --git a/.gitsecret/paths/mapping.cfg b/.gitsecret/paths/mapping.cfg
deleted file mode 100644
index ae4cf293..00000000
--- a/.gitsecret/paths/mapping.cfg
+++ /dev/null
@@ -1 +0,0 @@
-.env:555d804179d7207ad6784a84afb88d2ec44f90ea3b7a061d0e38f9dd53fe7211
diff --git a/libs/kotaemon/kotaemon/embeddings/langchain_based.py b/libs/kotaemon/kotaemon/embeddings/langchain_based.py
index 9bd0e7b5..14cb2a82 100644
--- a/libs/kotaemon/kotaemon/embeddings/langchain_based.py
+++ b/libs/kotaemon/kotaemon/embeddings/langchain_based.py
@@ -137,14 +137,14 @@ def __init__(
         azure_endpoint: Optional[str] = None,
         deployment: Optional[str] = None,
         openai_api_key: Optional[str] = None,
-        openai_api_version: Optional[str] = None,
+        api_version: Optional[str] = None,
         request_timeout: Optional[float] = None,
         **params,
     ):
         super().__init__(
             azure_endpoint=azure_endpoint,
             deployment=deployment,
-            openai_api_version=openai_api_version,
+            api_version=api_version,
             openai_api_key=openai_api_key,
             request_timeout=request_timeout,
             **params,
diff --git a/libs/kotaemon/kotaemon/llms/__init__.py b/libs/kotaemon/kotaemon/llms/__init__.py
index 4e81d21e..d7547a67 100644
--- a/libs/kotaemon/kotaemon/llms/__init__.py
+++ b/libs/kotaemon/kotaemon/llms/__init__.py
@@ -2,7 +2,7 @@
 
 from .base import BaseLLM
 from .branching import GatedBranchingPipeline, SimpleBranchingPipeline
-from .chats import AzureChatOpenAI, ChatLLM, EndpointChatLLM, LlamaCppChat
+from .chats import AzureChatOpenAI, ChatLLM, ChatOpenAI, EndpointChatLLM, LlamaCppChat
 from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
 from .cot import ManualSequentialChainOfThought, Thought
 from .linear import GatedLinearPipeline, SimpleLinearPipeline
@@ -17,6 +17,7 @@
     "HumanMessage",
     "AIMessage",
     "SystemMessage",
+    "ChatOpenAI",
     "AzureChatOpenAI",
     "LlamaCppChat",
     # completion-specific components
diff --git a/libs/kotaemon/kotaemon/llms/chats/__init__.py b/libs/kotaemon/kotaemon/llms/chats/__init__.py
index 53d44b2b..5b503176 100644
--- a/libs/kotaemon/kotaemon/llms/chats/__init__.py
+++ b/libs/kotaemon/kotaemon/llms/chats/__init__.py
@@ -1,11 +1,12 @@
 from .base import ChatLLM
 from .endpoint_based import EndpointChatLLM
-from .langchain_based import AzureChatOpenAI, LCChatMixin
+from .langchain_based import AzureChatOpenAI, ChatOpenAI, LCChatMixin
 from .llamacpp import LlamaCppChat
 
 __all__ = [
     "ChatLLM",
     "EndpointChatLLM",
+    "ChatOpenAI",
     "AzureChatOpenAI",
     "LCChatMixin",
     "LlamaCppChat",
diff --git a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
index 14064bae..6c87c720 100644
--- a/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
+++ b/libs/kotaemon/kotaemon/llms/chats/langchain_based.py
@@ -165,7 +165,36 @@ def specs(self, path: str):
         raise ValueError(f"Invalid param {path}")
 
 
+class ChatOpenAI(LCChatMixin, ChatLLM):  # type: ignore
+    def __init__(
+        self,
+        openai_api_base: str | None = None,
+        openai_api_key: str | None = None,
+        model: str | None = None,
+        temperature: float = 0.7,
+        request_timeout: float | None = None,
+        **params,
+    ):
+        super().__init__(
+            openai_api_base=openai_api_base,
+            openai_api_key=openai_api_key,
+            model=model,
+            temperature=temperature,
+            request_timeout=request_timeout,
+            **params,
+        )
+
+    def _get_lc_class(self):
+        try:
+            from langchain_openai import ChatOpenAI
+        except ImportError:
+            from langchain.chat_models import ChatOpenAI
+
+        return ChatOpenAI
+
+
 class AzureChatOpenAI(LCChatMixin, ChatLLM):  # type: ignore
+
     def __init__(
         self,
         azure_endpoint: str | None = None,
diff --git a/libs/ktem/flowsettings.py b/libs/ktem/flowsettings.py
index 52ebf863..a3589fec 100644
--- a/libs/ktem/flowsettings.py
+++ b/libs/ktem/flowsettings.py
@@ -31,70 +31,98 @@
     "__type__": "kotaemon.storages.ChromaVectorStore",
     "path": str(user_cache_dir / "vectorstore"),
 }
-KH_LLMS = {
-    # example for using Azure OpenAI, the config variables can set as environment
-    # variables or in the .env file
-    # "gpt4": {
-    #     "def": {
-    #         "__type__": "kotaemon.llms.AzureChatOpenAI",
-    #         "temperature": 0,
-    #         "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
-    #         "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
-    #         "openai_api_version": config("OPENAI_API_VERSION", default=""),
-    #         "deployment_name": "<your deployment name>",
-    #         "stream": True,
-    #     },
-    #     "accuracy": 10,
-    #     "cost": 10,
-    #     "default": False,
-    # },
-    # "gpt35": {
-    #     "def": {
-    #         "__type__": "kotaemon.llms.AzureChatOpenAI",
-    #         "temperature": 0,
-    #         "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
-    #         "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
-    #         "openai_api_version": config("OPENAI_API_VERSION", default=""),
-    #         "deployment_name": "<your deployment name>",
-    #         "request_timeout": 10,
-    #         "stream": False,
-    #     },
-    #     "accuracy": 5,
-    #     "cost": 5,
-    #     "default": False,
-    # },
-    "local": {
+KH_LLMS = {}
+KH_EMBEDDINGS = {}
+
+# populate options from config
+if config("AZURE_OPENAI_API_KEY", default="") and config(
+    "AZURE_OPENAI_ENDPOINT", default=""
+):
+    if config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""):
+        KH_LLMS["azure"] = {
+            "def": {
+                "__type__": "kotaemon.llms.AzureChatOpenAI",
+                "temperature": 0,
+                "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
+                "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
+                "api_version": config("OPENAI_API_VERSION", default="")
+                or "2024-02-15-preview",
+                "deployment_name": config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""),
+                "request_timeout": 10,
+                "stream": False,
+            },
+            "default": False,
+            "accuracy": 5,
+            "cost": 5,
+        }
+    if config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""):
+        KH_EMBEDDINGS["azure"] = {
+            "def": {
+                "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
+                "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
+                "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
+                "api_version": config("OPENAI_API_VERSION", default="")
+                or "2024-02-15-preview",
+                "deployment": config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""),
+                "request_timeout": 10,
+                "chunk_size": 16,
+            },
+            "default": False,
+            "accuracy": 5,
+            "cost": 5,
+        }
+
+if config("OPENAI_API_KEY", default=""):
+    KH_LLMS["openai"] = {
         "def": {
-            "__type__": "kotaemon.llms.EndpointChatLLM",
-            "endpoint_url": "http://localhost:31415/v1/chat/completions",
+            "__type__": "kotaemon.llms.ChatOpenAI",
+            "temperature": 0,
+            "openai_api_base": config("OPENAI_API_BASE", default="")
+            or "https://api.openai.com/v1",
+            "openai_api_key": config("OPENAI_API_KEY", default=""),
+            "model": config("OPENAI_CHAT_MODEL", default="") or "gpt-3.5-turbo",
+            "request_timeout": 10,
+            "stream": False,
         },
         "default": False,
-    },
-}
-KH_EMBEDDINGS = {
-    # example for using Azure OpenAI, the config variables can set as environment
-    # variables or in the .env file
-    # "ada": {
-    #     "def": {
-    #         "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
-    #         "model": "text-embedding-ada-002",
-    #         "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
-    #         "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
-    #         "deployment": "<your deployment name>",
-    #         "chunk_size": 16,
-    #     },
-    #     "accuracy": 5,
-    #     "cost": 5,
-    #     "default": True,
-    # },
-    "local": {
+    }
+    if len(KH_EMBEDDINGS) < 1:
+        KH_EMBEDDINGS["openai"] = {
+            "def": {
+                "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
+                "openai_api_base": config("OPENAI_API_BASE", default="")
+                or "https://api.openai.com/v1",
+                "openai_api_key": config("OPENAI_API_KEY", default=""),
+                "model": config(
+                    "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-ada-002"
+                )
+                or "text-embedding-ada-002",
+                "request_timeout": 10,
+                "chunk_size": 16,
+            },
+            "default": False,
+        }
+
+if config("LOCAL_MODEL", default=""):
+    KH_LLMS["local"] = {
         "def": {
-            "__type__": "kotaemon.embeddings.EndpointEmbeddings",
-            "endpoint_url": "http://localhost:31415/v1/embeddings",
+            "__type__": "kotaemon.llms.EndpointChatLLM",
+            "endpoint_url": "http://localhost:31415/v1/chat/completions",
         },
         "default": False,
-    },
-}
+        "cost": 0,
+    }
+    if len(KH_EMBEDDINGS) < 1:
+        KH_EMBEDDINGS["local"] = {
+            "def": {
+                "__type__": "kotaemon.embeddings.EndpointEmbeddings",
+                "endpoint_url": "http://localhost:31415/v1/embeddings",
+            },
+            "default": False,
+            "cost": 0,
+        }
+
+
 KH_REASONINGS = ["ktem.reasoning.simple.FullQAPipeline"]
 
 
diff --git a/libs/ktem/khapptests/__init__.py b/libs/ktem/ktem_tests/__init__.py
similarity index 100%
rename from libs/ktem/khapptests/__init__.py
rename to libs/ktem/ktem_tests/__init__.py
diff --git a/libs/ktem/khapptests/resources/embedding_openai.json b/libs/ktem/ktem_tests/resources/embedding_openai.json
similarity index 100%
rename from libs/ktem/khapptests/resources/embedding_openai.json
rename to libs/ktem/ktem_tests/resources/embedding_openai.json
diff --git a/libs/ktem/khapptests/test_qa.py b/libs/ktem/ktem_tests/test_qa.py
similarity index 100%
rename from libs/ktem/khapptests/test_qa.py
rename to libs/ktem/ktem_tests/test_qa.py
diff --git a/scripts/serve_local.py b/scripts/serve_local.py
index 61b8f778..f1f4831f 100644
--- a/scripts/serve_local.py
+++ b/scripts/serve_local.py
@@ -3,9 +3,7 @@
 from inspect import currentframe, getframeinfo
 from pathlib import Path
 
-import dotenv
-
-configs = dotenv.dotenv_values(".env")
+from decouple import config
 
 system_name = platform.system()
 
@@ -53,7 +51,7 @@ def guess_chat_format(local_model_file):
 
 
 def main():
-    local_model_file = configs.get("LOCAL_MODEL", "")
+    local_model_file = config("LOCAL_MODEL", default="")
 
     if not local_model_file:
         print("LOCAL_MODEL not set in the `.env` file.")
diff --git a/scripts/server_llamacpp_linux.sh b/scripts/server_llamacpp_linux.sh
index f72ccde5..a45e670a 100755
--- a/scripts/server_llamacpp_linux.sh
+++ b/scripts/server_llamacpp_linux.sh
@@ -87,7 +87,7 @@ activate_environment
 
 # install dependencies
 # ver 0.2.56 produces segment error for /embeddings on MacOS
-python -m pip install llama-cpp-python[server]!=0.2.56
+python -m pip install llama-cpp-python[server]==0.2.55
 
 # start the server with passed params
 python -m llama_cpp.server $@
diff --git a/scripts/server_llamacpp_macos.sh b/scripts/server_llamacpp_macos.sh
index 4ed9ac2a..13d0784e 100755
--- a/scripts/server_llamacpp_macos.sh
+++ b/scripts/server_llamacpp_macos.sh
@@ -88,7 +88,7 @@ activate_environment
 
 # install dependencies
 # ver 0.2.56 produces segment error for /embeddings on MacOS
-python -m pip install llama-cpp-python[server]!=0.2.56
+python -m pip install llama-cpp-python[server]==0.2.55
 
 # start the server with passed params
 python -m llama_cpp.server $@
diff --git a/scripts/server_llamacpp_windows.bat b/scripts/server_llamacpp_windows.bat
index 48779dbe..97c12924 100644
--- a/scripts/server_llamacpp_windows.bat
+++ b/scripts/server_llamacpp_windows.bat
@@ -28,7 +28,7 @@ call :activate_environment
 
 @rem install dependencies
 @rem ver 0.2.56 produces segment error for /embeddings on MacOS
-call python -m pip install llama-cpp-python[server]!=0.2.56
+call python -m pip install llama-cpp-python[server]==0.2.55
 
 @REM @rem start the server with passed params
 call python -m llama_cpp.server %*