diff --git a/helm/h2ogpt-chart/Chart.yaml b/helm/h2ogpt-chart/Chart.yaml
index d90a7d69e..5a597ed84 100644
--- a/helm/h2ogpt-chart/Chart.yaml
+++ b/helm/h2ogpt-chart/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v2
 name: h2ogpt
-description: A Helm chart for h2ogpt
+description: A Helm chart for h2oGPT
 
 # A chart can be either an 'application' or a 'library' chart.
 #
@@ -15,10 +15,10 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0-288
+version: 0.2.1-1254
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: 0.1.0-288
+appVersion: 0.2.1-1254
diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md
new file mode 100644
index 000000000..b4b6bc94b
--- /dev/null
+++ b/helm/h2ogpt-chart/README.md
@@ -0,0 +1,148 @@
+# h2ogpt
+
+![Version: 0.2.1-1254](https://img.shields.io/badge/Version-0.2.1--1254-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.2.1-1254](https://img.shields.io/badge/AppVersion-0.2.1--1254-informational?style=flat-square)
+
+A Helm chart for h2oGPT
+
+## Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| agent.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. |
+| agent.agent_workers | int | `5` |  |
+| agent.autoscaling.enabled | bool | `false` |  |
+| agent.autoscaling.maxReplicas | int | `2` |  |
+| agent.autoscaling.minReplicas | int | `1` |  |
+| agent.autoscaling.targetCPU | int | `80` |  |
+| agent.autoscaling.targetMemory | string | `"32Gi"` |  |
+| agent.enabled | bool | `true` | Enable agent, this must be `false` if `h2ogpt.agent.enabled` is `true` |
+| agent.env | object | `{}` |  |
+| agent.extraVolumeMounts | list | `[]` | Extra volume mounts |
+| agent.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs |
+| agent.image.pullPolicy | string | `"IfNotPresent"` |  |
+| agent.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` |  |
+| agent.image.tag | string | `nil` |  |
+| agent.imagePullSecrets | string | `nil` |  |
+| agent.initImage.pullPolicy | string | `nil` |  |
+| agent.initImage.repository | string | `nil` |  |
+| agent.initImage.tag | string | `nil` |  |
+| agent.nodeSelector | object | `{}` | Node selector for the agent pods. |
+| agent.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} |
+| agent.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| agent.podAnnotations | object | `{}` |  |
+| agent.podLabels | object | `{}` |  |
+| agent.podSecurityContext.fsGroup | string | `nil` |  |
+| agent.podSecurityContext.runAsGroup | string | `nil` |  |
+| agent.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| agent.podSecurityContext.runAsUser | string | `nil` |  |
+| agent.replicaCount | int | `1` |  |
+| agent.resources.limits."nvidia.com/gpu" | int | `1` |  |
+| agent.resources.limits.memory | string | `"64Gi"` |  |
+| agent.resources.requests."nvidia.com/gpu" | int | `1` |  |
+| agent.resources.requests.memory | string | `"32Gi"` |  |
+| agent.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| agent.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| agent.securityContext.runAsNonRoot | bool | `true` |  |
+| agent.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
+| agent.service.agentPort | int | `5004` |  |
+| agent.service.annotations | object | `{}` |  |
+| agent.service.type | string | `"NodePort"` |  |
+| agent.storage.class | string | `nil` |  |
+| agent.storage.size | string | `"128Gi"` |  |
+| agent.storage.useEphemeral | bool | `true` |  |
+| agent.tolerations | list | `[]` | Node taints to tolerate by the agent pods. |
+| agent.updateStrategy.type | string | `"RollingUpdate"` |  |
+| caCertificates | string | `""` | CA certs |
+| fullnameOverride | string | `""` |  |
+| global.externalLLM.enabled | bool | `false` |  |
+| global.externalLLM.modelLock | string | `nil` |  |
+| global.externalLLM.secret | object | `{}` | list of secrets for h2ogpt and agent env |
+| global.visionModels.enabled | bool | `false` | Enable vision models |
+| global.visionModels.rotateAlignResizeImage | bool | `false` |  |
+| global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] |
+| h2ogpt.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. |
+| h2ogpt.agent | object | `{"agent_workers":5,"enabled":false}` | Enable agent |
+| h2ogpt.agent.enabled | bool | `false` | Run agent with h2oGPT container |
+| h2ogpt.enabled | bool | `true` | Enable h2oGPT |
+| h2ogpt.env | object | `{}` |  |
+| h2ogpt.extraVolumeMounts | list | `[]` | Extra volume mounts |
+| h2ogpt.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs |
+| h2ogpt.image.pullPolicy | string | `"IfNotPresent"` |  |
+| h2ogpt.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` |  |
+| h2ogpt.image.tag | string | `nil` |  |
+| h2ogpt.imagePullSecrets | string | `nil` |  |
+| h2ogpt.initImage.pullPolicy | string | `nil` |  |
+| h2ogpt.initImage.repository | string | `nil` |  |
+| h2ogpt.initImage.tag | string | `nil` |  |
+| h2ogpt.nodeSelector | object | `{}` | Node selector for the h2ogpt pods. |
+| h2ogpt.openai.enabled | bool | `true` |  |
+| h2ogpt.openai.openai_workers | int | `5` |  |
+| h2ogpt.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} |
+| h2ogpt.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| h2ogpt.podAnnotations | object | `{}` |  |
+| h2ogpt.podLabels | object | `{}` |  |
+| h2ogpt.podSecurityContext.fsGroup | string | `nil` |  |
+| h2ogpt.podSecurityContext.runAsGroup | string | `nil` |  |
+| h2ogpt.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| h2ogpt.podSecurityContext.runAsUser | string | `nil` |  |
+| h2ogpt.replicaCount | int | `1` |  |
+| h2ogpt.resources.limits."nvidia.com/gpu" | int | `0` |  |
+| h2ogpt.resources.limits.memory | string | `"64Gi"` |  |
+| h2ogpt.resources.requests."nvidia.com/gpu" | int | `0` |  |
+| h2ogpt.resources.requests.memory | string | `"32Gi"` |  |
+| h2ogpt.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| h2ogpt.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| h2ogpt.securityContext.runAsNonRoot | bool | `true` |  |
+| h2ogpt.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
+| h2ogpt.service.agentPort | int | `5004` |  |
+| h2ogpt.service.functionPort | int | `5002` |  |
+| h2ogpt.service.openaiPort | int | `5000` |  |
+| h2ogpt.service.type | string | `"NodePort"` |  |
+| h2ogpt.service.webPort | int | `80` |  |
+| h2ogpt.service.webServiceAnnotations | object | `{}` |  |
+| h2ogpt.storage.class | string | `nil` |  |
+| h2ogpt.storage.size | string | `"128Gi"` |  |
+| h2ogpt.storage.useEphemeral | bool | `true` |  |
+| h2ogpt.tolerations | list | `[]` | Node taints to tolerate by the h2ogpt pods. |
+| h2ogpt.updateStrategy.type | string | `"RollingUpdate"` |  |
+| nameOverride | string | `""` |  |
+| namespaceOverride | string | `""` |  |
+| vllm.containerArgs[0] | string | `"--model"` |  |
+| vllm.containerArgs[1] | string | `"h2oai/h2ogpt-4096-llama2-7b-chat"` |  |
+| vllm.containerArgs[2] | string | `"--tokenizer"` |  |
+| vllm.containerArgs[3] | string | `"hf-internal-testing/llama-tokenizer"` |  |
+| vllm.containerArgs[4] | string | `"--tensor-parallel-size"` |  |
+| vllm.containerArgs[5] | int | `2` |  |
+| vllm.containerArgs[6] | string | `"--seed"` |  |
+| vllm.containerArgs[7] | int | `1234` |  |
+| vllm.containerArgs[8] | string | `"--trust-remote-code"` |  |
+| vllm.enabled | bool | `false` | Enable vllm |
+| vllm.env.DO_NOT_TRACK | string | `"1"` |  |
+| vllm.env.VLLM_NO_USAGE_STATS | string | `"1"` |  |
+| vllm.image.pullPolicy | string | `"IfNotPresent"` |  |
+| vllm.image.repository | string | `"vllm/vllm-openai"` |  |
+| vllm.image.tag | string | `"latest"` |  |
+| vllm.imagePullSecrets | string | `nil` |  |
+| vllm.nodeSelector | string | `nil` |  |
+| vllm.overrideConfig | string | `nil` |  |
+| vllm.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| vllm.podAnnotations | object | `{}` |  |
+| vllm.podLabels | object | `{}` |  |
+| vllm.podSecurityContext.fsGroup | string | `nil` |  |
+| vllm.podSecurityContext.runAsGroup | string | `nil` |  |
+| vllm.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| vllm.podSecurityContext.runAsUser | string | `nil` |  |
+| vllm.replicaCount | int | `1` |  |
+| vllm.resources | string | `nil` |  |
+| vllm.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| vllm.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| vllm.securityContext.runAsNonRoot | bool | `true` |  |
+| vllm.securityContext.seccompProfile | string | `nil` |  |
+| vllm.service.port | int | `5000` |  |
+| vllm.service.type | string | `"ClusterIP"` |  |
+| vllm.storage.class | string | `nil` |  |
+| vllm.storage.size | string | `"512Gi"` |  |
+| vllm.storage.useEphemeral | bool | `true` |  |
+| vllm.tolerations | string | `nil` |  |
+| vllm.updateStrategy.type | string | `"RollingUpdate"` |  |
+
diff --git a/helm/h2ogpt-chart/templates/NOTES.txt b/helm/h2ogpt-chart/templates/NOTES.txt
new file mode 100644
index 000000000..c32a7790f
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/NOTES.txt
@@ -0,0 +1,8 @@
+Thank you for installing {{ .Chart.Name }}.
+
+Your release is named {{ .Release.Name }}.
+
+To learn more about the release, try:
+
+  $ helm status {{ .Release.Name }}
+  $ helm get all {{ .Release.Name }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl
index a8352a4ad..61e2168dd 100644
--- a/helm/h2ogpt-chart/templates/_helpers.tpl
+++ b/helm/h2ogpt-chart/templates/_helpers.tpl
@@ -68,3 +68,131 @@ Create the name of the service account to use
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
+
+{{/*
+Config for h2oGPT
+*/}}
+
+{{- define "h2ogpt.config" -}}
+{{- with .Values.h2ogpt }}
+verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }}
+{{- if .overrideConfig.heap_app_id }}
+heap_app_id: {{ .overrideConfig.heap_app_id }}
+{{- end }}
+num_async: {{ default 10 .overrideConfig.num_async }}
+save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }}
+score_model: {{ default "None" .overrideConfig.score_model }}
+share: {{ default "False" (.overrideConfig.share | quote ) }}
+enforce_h2ogpt_api_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_api_key | quote ) }}
+enforce_h2ogpt_ui_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_ui_key | quote ) }}
+{{- if .overrideConfig.h2ogpt_api_keys }}
+h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }}
+{{- end }}
+{{- if .overrideConfig.use_auth_token }}
+use_auth_token: {{ .overrideConfig.use_auth_token }}
+{{- end }}
+visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }}
+{{/*visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}*/}}
+top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }}
+{{- if .overrideConfig.admin_pass }}
+admin_pass: {{ .overrideConfig.admin_pass }}
+{{- end }}
+{{- if .openai.enabled }}
+openai_server: "True"
+openai_port: 5000
+openai_workers: {{ default 5 .openai.openai_workers }}
+{{- end }}
+{{- if .agent.enabled }}
+agent_server: "True"
+agent_port: 5004
+agent_workers: {{ .agent.agent_workers }}
+{{- end }}
+function_server: {{ default "True" ( .overrideConfig.function_server | quote ) }}
+function_port: 5002
+function_server_workers: {{ default 1 .overrideConfig.function_server_workers }}
+multiple_workers_gunicorn: {{ default "True" ( .overrideConfig.multiple_workers_gunicorn | quote ) }}
+llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }}
+enable_llava: {{ default "True" ( .overrideConfig.enable_llava | quote ) }}
+{{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }}
+enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }}
+enable_stt: {{ default "True" ( .overrideConfig.enable_stt | quote ) }}
+enable_transcriptions: {{ default "True" ( .overrideConfig.enable_transcriptions | quote ) }}
+asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }}
+pre_load_embedding_model: {{ default "True" (.overrideConfig.pre_load_embedding_model | quote ) }}
+pre_load_image_audio_models: {{ default "True" ( .overrideConfig.pre_load_image_audio_models | quote ) }}
+cut_distance: {{ default 10000 .overrideConfig.cut_distance }}
+hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }}
+enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }}
+enable_doctr: {{ default "True" ( .overrideConfig.enable_doctr | quote ) }}
+{{- else }}
+enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }}
+enable_stt: {{ default "False" ( .overrideConfig.enable_stt | quote ) }}
+enable_transcriptions: {{ default "False" ( .overrideConfig.enable_transcriptions | quote ) }}
+embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }}
+hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }}
+pre_load_embedding_model: {{ default "False" ( .overrideConfig.pre_load_embedding_model | quote ) }}
+pre_load_image_audio_models:  {{ default "False" ( .overrideConfig.pre_load_image_audio_models | quote ) }}
+enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }}
+enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Config for agent
+*/}}
+
+{{- define "agent.config" -}}
+{{- with .Values.agent }}
+verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }}
+{{- if .overrideConfig.heap_app_id }}
+heap_app_id: {{ .overrideConfig.heap_app_id }}
+{{- end }}
+num_async: {{ default 10 .overrideConfig.num_async }}
+save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }}
+score_model: {{ default "None" .overrideConfig.score_model }}
+share: {{ default "False" (.overrideConfig.share | quote ) }}
+enforce_h2ogpt_api_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_api_key | quote ) }}
+enforce_h2ogpt_ui_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_ui_key | quote ) }}
+{{- if .overrideConfig.h2ogpt_api_keys }}
+h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }}
+{{- end }}
+{{- if .overrideConfig.use_auth_token }}
+use_auth_token: {{ .overrideConfig.use_auth_token }}
+{{- end }}
+visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }}
+{{/*visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}*/}}
+top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }}
+{{- if .overrideConfig.admin_pass }}
+admin_pass: {{ .overrideConfig.admin_pass }}
+{{- end }}
+agent_server: "True"
+agent_port: 5004
+agent_workers: {{ default 5 .agent_workers }}
+multiple_workers_gunicorn: {{ default "True" ( .overrideConfig.multiple_workers_gunicorn | quote ) }}
+llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }}
+enable_llava: {{ default "True" ( .overrideConfig.enable_llava | quote ) }}
+{{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }}
+enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }}
+enable_stt: {{ default "True" ( .overrideConfig.enable_stt | quote ) }}
+enable_transcriptions: {{ default "True" ( .overrideConfig.enable_transcriptions | quote ) }}
+asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }}
+pre_load_embedding_model: {{ default "True" (.overrideConfig.pre_load_embedding_model | quote ) }}
+pre_load_image_audio_models: {{ default "True" ( .overrideConfig.pre_load_image_audio_models | quote ) }}
+cut_distance: {{ default 10000 .overrideConfig.cut_distance }}
+hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }}
+enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }}
+enable_doctr: {{ default "True" ( .overrideConfig.enable_doctr | quote ) }}
+{{- else }}
+enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }}
+enable_stt: {{ default "False" ( .overrideConfig.enable_stt | quote ) }}
+enable_transcriptions: {{ default "False" ( .overrideConfig.enable_transcriptions | quote ) }}
+embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }}
+hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }}
+pre_load_embedding_model: {{ default "False" ( .overrideConfig.pre_load_embedding_model | quote ) }}
+pre_load_image_audio_models:  {{ default "False" ( .overrideConfig.pre_load_image_audio_models | quote ) }}
+enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }}
+enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }}
+{{- end }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-configmap.yaml b/helm/h2ogpt-chart/templates/agents-configmap.yaml
new file mode 100644
index 000000000..b6fa6e51e
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-configmap.yaml
@@ -0,0 +1,26 @@
+{{- if .Values.agent.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-agent-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := ( include "agent.config" . | fromYaml ) }}
+{{- /* convert boolean value to cli compatiblity */}}
+  {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
+  {{- else }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+  {{- end }}
+{{- end }}
+{{- range $key, $value := ( .Values.agent.additionalConfig ) }}
+{{- /* convert boolean value to cli compatiblity */}}
+  {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
+  {{- else }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+  {{- end }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml
new file mode 100644
index 000000000..ac737a792
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml
@@ -0,0 +1,167 @@
+{{- if .Values.agent.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-agent
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}-agent
+spec:
+  replicas: {{ .Values.agent.replicaCount }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}-agent
+  {{- if .Values.agent.updateStrategy }}
+  strategy: {{- toYaml .Values.agent.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.agent.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}-agent
+        {{- with .Values.agent.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.agent.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.agent.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.agent.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.agent.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.agent.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.agent.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.agent.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.agent.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ include "h2ogpt.fullname" . }}-agent
+          securityContext:
+            {{- toYaml .Values.agent.securityContext | nindent 12 }}
+          image: "{{ .Values.agent.image.repository }}:{{ .Values.agent.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.agent.image.pullPolicy }}
+          command: ["/bin/bash", "-c"]
+          args:
+            - >
+              python3 /workspace/generate.py
+          ports:
+            - name: agent
+              containerPort: 5004
+              protocol: TCP
+          {{- if .Values.agent.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.agent.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.agent.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.agent.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.agent.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-agent-config
+          {{- if .Values.global.externalLLM.enabled }}
+            - secretRef:
+                name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
+          {{- end }}
+          env:
+          {{- range $key, $value := .Values.agent.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          {{- if .Values.global.externalLLM.enabled }}
+            - name: H2OGPT_MODEL_LOCK
+              value: {{ toJson .Values.global.externalLLM.modelLock | quote }}
+            - name: H2OGPT_SCORE_MODEL
+              value: None
+          {{- end }}
+          {{- if .Values.global.visionModels.enabled }}
+            - name: H2OGPT_VISIBLE_VISION_MODELS
+              value: {{ .Values.global.visionModels.visibleModels | quote }}
+            - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE
+              value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }}
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-agent-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-agent-volume
+              mountPath: /workspace/save
+              subPath: save
+            {{- if .Values.caCertificates }}
+            - name: ca-certificates
+              mountPath: /etc/ssl/certs/root-ca-bundle.crt
+              subPath: root-ca-bundle.crt
+            {{- end }}
+            {{ with .Values.agent.extraVolumeMounts }}
+            {{- toYaml . | nindent 12 }}
+            {{- end }}
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-agent-volume
+          {{- if not .Values.agent.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName:  {{ include "h2ogpt.fullname" . }}-agent-volume
+          {{- else}}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.agent.storage.size | quote }}
+                storageClassName: {{ .Values.agent.storage.class }}
+          {{- end }}
+        {{- if .Values.caCertificates }}
+        - name: ca-certificates
+          configMap:
+            name: {{ include "h2ogpt.fullname" . }}-ca-certificates
+        {{- end }}
+        {{- with .Values.agent.extraVolumes }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-hpa.yaml b/helm/h2ogpt-chart/templates/agents-hpa.yaml
new file mode 100644
index 000000000..5cf083bbb
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-hpa.yaml
@@ -0,0 +1,33 @@
+{{- if .Values.agent.autoscaling.enabled | default false }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ .Release.Name }}-agent
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "h2ogpt.fullname" . }}-agent
+  minReplicas: {{ .Values.agent.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.agent.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.agent.autoscaling.targetCPU }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.agent.autoscaling.targetCPU }}
+    {{- end }}
+    {{- if .Values.agent.autoscaling.targetMemory }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.agent.autoscaling.targetMemory }}
+    {{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-pvc.yaml b/helm/h2ogpt-chart/templates/agents-pvc.yaml
new file mode 100644
index 000000000..2ac48c921
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-pvc.yaml
@@ -0,0 +1,14 @@
+{{- if and (.Values.agent.enabled) (not .Values.agent.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-agent-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.agent.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.agent.storage.size | quote }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-service.yaml b/helm/h2ogpt-chart/templates/agents-service.yaml
new file mode 100644
index 000000000..6b0653555
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-service.yaml
@@ -0,0 +1,21 @@
+{{- if .Values.agent.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-agent
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+
+  {{- with .Values.agent.service.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}-agent
+  ports:
+    - name: agent
+      protocol: TCP
+      port: {{ .Values.agent.service.agentPort }}
+      targetPort: 5004
+  type: {{ .Values.agent.service.type }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
new file mode 100644
index 000000000..84d2f4199
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
@@ -0,0 +1,12 @@
+{{- if .Values.caCertificates}}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-ca-certificates
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+  root-ca-bundle.crt:  |
+    {{ .Values.caCertificates | nindent 4 | trim }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/config-map.yaml b/helm/h2ogpt-chart/templates/config-map.yaml
deleted file mode 100644
index 64aca5503..000000000
--- a/helm/h2ogpt-chart/templates/config-map.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.h2ogpt.overrideConfig }}
-  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.tgi.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.tgi.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.vllm.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.vllm.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.lmdeploy.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.lmdeploy.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.caCertificates}}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-ca-certificates
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-  root-ca-bundle.crt:  |
-    {{ .Values.caCertificates | nindent 4 | trim }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/deployment.yaml b/helm/h2ogpt-chart/templates/deployment.yaml
deleted file mode 100644
index d89d8a3cb..000000000
--- a/helm/h2ogpt-chart/templates/deployment.yaml
+++ /dev/null
@@ -1,884 +0,0 @@
-{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
-  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if .Values.h2ogpt.stack.enabled }}
-  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
-    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
-  {{- end }}
-{{- end }}
----
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}
-spec:
-  {{- if not .Values.h2ogpt.autoscaling.enabled }}
-  replicas: {{ .Values.h2ogpt.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}
-  {{- if .Values.h2ogpt.updateStrategy }}
-  strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.h2ogpt.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}
-        {{- with .Values.h2ogpt.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.h2ogpt.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.h2ogpt.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.h2ogpt.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.h2ogpt.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.h2ogpt.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.h2ogpt.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.h2ogpt.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-          securityContext:
-            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
-          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
-          command: ["python3"]
-          args: 
-            - "-m" 
-            - "vllm.entrypoints.openai.api_server"
-            - "--port"
-            - "5000"
-            - "--host"
-            - "0.0.0.0"
-            - "--download-dir"
-            - "/workspace/.cache/huggingface/hub"
-{{- range $arg := .Values.vllm.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 5000
-              protocol: TCP
-          {{- if .Values.vllm.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.vllm.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.vllm.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          {{- range $key, $value := .Values.vllm.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-          {{- end }}
-        - name: {{ include "h2ogpt.fullname" . }}
-          securityContext:
-            {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }}
-          image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }}
-          command: ["/bin/bash", "-c"]
-          {{- if .Values.h2ogpt.stack.enabled }}
-          args:
-          - >
-            while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}''
-            http://localhost:5000/v1/models)" != "200" ]]; do
-              echo "Waiting for inference service to become ready... (2sec)"
-              sleep 2
-            done
-
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if not .Values.h2ogpt.stack.enabled }}
-          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
-          args:
-          - >
-            until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
-              do
-                echo "Waiting for inference service to become ready...";
-                sleep 5;
-              done
-              
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
-          args:
-          - >
-            until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
-              do
-                echo "Waiting for inference service to become ready...";
-                sleep 5;
-              done
-              
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
-          args:
-          - >
-            until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1;
-              do
-                echo "Waiting for inference service to become ready...";
-                sleep 5;
-              done
-              
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }}
-          args:
-          - >
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- end }}
-          ports:
-            - name: http
-              containerPort: 7860
-              protocol: TCP
-            - name: gpt
-              containerPort: 8888
-              protocol: TCP
-            - name: openai
-              containerPort: 5000
-              protocol: TCP
-            - name: function
-              containerPort: 5002
-              protocol: TCP
-            - name: agent
-              containerPort: 5004
-              protocol: TCP
-          {{- if .Values.h2ogpt.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.h2ogpt.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.h2ogpt.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-config
-          env:
-          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-          - name: h2ogpt_inference_server
-            value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
-          {{- end }}
-          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-          - name: h2ogpt_inference_server
-            value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
-          {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-          - name: h2ogpt_inference_server
-            value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
-          {{- end }}
-          {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled)  }}
-          - name: h2ogpt_inference_server
-            value: "vllm:localhost:5000"
-          {{- end }}
-          {{- range $key, $value := .Values.h2ogpt.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }}
-          - name: OPENAI_AZURE_KEY
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: OPENAI_AZURE_KEY
-          - name: OPENAI_AZURE_API_BASE
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: OPENAI_AZURE_API_BASE
-          {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }}
-          - name: OPENAI_API_KEY
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: OPENAI_API_KEY
-          {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }}
-          - name: REPLICATE_API_TOKEN
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: REPLICATE_API_TOKEN
-          {{- end }}
-          {{- if .Values.h2ogpt.externalLLM.enabled }}
-          - name: H2OGPT_MODEL_LOCK
-            value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }}
-          - name: H2OGPT_SCORE_MODEL
-            value: None
-          {{- end }}
-          {{- if .Values.h2ogpt.visionModels.enabled }}
-          - name: H2OGPT_VISIBLE_VISION_MODELS
-            value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }}
-          - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE
-            value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }}
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-volume
-              mountPath: /workspace/save
-              subPath: save
-            {{- if .Values.caCertificates }}
-            - name: ca-certificates
-              mountPath: /etc/ssl/certs/root-ca-bundle.crt
-              subPath: root-ca-bundle.crt
-            {{- end }}
-            {{ with .Values.h2ogpt.extraVolumeMounts }}
-            {{- toYaml . | nindent 12 }}
-            {{- end }}
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-volume
-          {{- if not .Values.h2ogpt.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-volume          
-          {{- else}}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.h2ogpt.storage.size | quote }}
-                storageClassName: {{ .Values.h2ogpt.storage.class }}
-          {{- end }}
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        {{- end }}
-        {{- if .Values.caCertificates }}
-        - name: ca-certificates
-          configMap:
-            name: {{ include "h2ogpt.fullname" . }}-ca-certificates
-        {{- end }}
-        {{- with .Values.h2ogpt.extraVolumes }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-{{- end }}
----
-{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.h2ogpt.storage.class | quote }}
-  storageClassName: {{ .Values.h2ogpt.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.h2ogpt.storage.size | quote }}
-{{- end }}
-
----
-{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-spec:
-  {{- if not .Values.tgi.autoscaling.enabled }}
-  replicas: {{ .Values.tgi.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  {{- if .Values.tgi.updateStrategy }}
-  strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.tgi.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-        {{- with .Values.tgi.podLabels }}
-        {{ toYaml . | nindent 6 }}
-        {{- end }}
-    spec:
-      {{- with .Values.tgi.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.tgi.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.tgi.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.tgi.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.tgi.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-          securityContext:
-            {{- toYaml .Values.tgi.securityContext | nindent 12 }}
-          image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}"
-          imagePullPolicy: {{ .Values.tgi.image.pullPolicy }}
-          command: []
-          args: 
-{{- range $arg := .Values.tgi.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 80
-              protocol: TCP
-          {{- if .Values.tgi.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.tgi.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.tgi.resources | nindent 12 }}
-          env:
-          {{- range $key, $value := .Values.tgi.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-            - secretRef:
-                name: {{ .Values.tgi.hfSecret }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /app/cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /data
-              subPath: data
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-      volumes:
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        {{- end }}
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-        {{- if not .Values.tgi.storage.useEphemeral}}
-          persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-tgi-inference-volume      
-          {{- else}}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.tgi.storage.size | quote }}
-                storageClassName: {{ .Values.tgi.storage.class }}
-          {{- end }}
-{{- end }}
----
-{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.h2ogpt.storage.class | quote }}
-  storageClassName: {{ .Values.tgi.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.tgi.storage.size | quote }}
-{{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-spec:
-  {{- if not .Values.vllm.autoscaling.enabled }}
-  replicas: {{ .Values.vllm.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  {{- if .Values.vllm.updateStrategy }}
-  strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.vllm.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-        {{- with .Values.vllm.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.vllm.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.vllm.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.vllm.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.vllm.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.vllm.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.vllm.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.vllm.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-          securityContext:
-            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
-          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
-          command: ["python3"]
-          args: 
-            - "-m" 
-            - "vllm.entrypoints.openai.api_server"
-            - "--port"
-            - "5000"
-            - "--host"
-            - "0.0.0.0"
-            - "--download-dir"
-            - "/workspace/.cache/huggingface/hub"
-{{- range $arg := .Values.vllm.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 5000
-              protocol: TCP
-          {{- if .Values.vllm.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.vllm.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.vllm.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          {{- range $key, $value := .Values.vllm.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: shm
-              mountPath: /dev/shm
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        - emptyDir: 
-            medium: Memory
-            sizeLimit: 10.24Gi
-          name: shm          
-{{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.vllm.storage.class | quote }}
-  storageClassName: {{ .Values.vllm.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.vllm.storage.size | quote }}
-{{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-spec:
-  {{- if not .Values.lmdeploy.autoscaling.enabled }}
-  replicas: {{ .Values.lmdeploy.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  {{- if .Values.lmdeploy.updateStrategy }}
-  strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.lmdeploy.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-        {{- with .Values.lmdeploy.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.lmdeploy.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.lmdeploy.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.lmdeploy.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.lmdeploy.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.lmdeploy.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.lmdeploy.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.lmdeploy.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-          securityContext:
-            {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }}
-          image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }}
-          command: ["lmdeploy"]
-          args:
-            - "serve"
-            - "api_server"
-{{- range $arg := .Values.lmdeploy.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 23333
-              protocol: TCP
-          {{- if .Values.lmdeploy.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.lmdeploy.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.lmdeploy.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          - name: HF_HOME
-            value: "/workspace/.cache"
-          {{- range $key, $value := .Values.lmdeploy.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: shm
-              mountPath: /dev/shm
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-          {{- if not .Values.lmdeploy.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes:
-                  - ReadWriteOnce
-                resources:
-                  requests:
-                    storage: {{ .Values.lmdeploy.storage.size | quote }}
-                storageClassName: {{ .Values.lmdeploy.storage.class }}
-          {{- end }}
-        - emptyDir:
-            medium: Memory
-            sizeLimit: 10.24Gi
-          name: shm
-{{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.lmdeploy.storage.class | quote }}
-  storageClassName: {{ .Values.lmdeploy.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.lmdeploy.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
new file mode 100644
index 000000000..044d9eeae
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
@@ -0,0 +1,14 @@
+{{- if and .Values.global.externalLLM.enabled (or .Values.agent.enabled .Values.h2ogpt.enabled) }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+type: Opaque
+stringData:
+{{- range $key, $value := .Values.global.externalLLM.secret }}
+  {{ $key }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
new file mode 100644
index 000000000..ceb8a18d9
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
@@ -0,0 +1,26 @@
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := ( include "h2ogpt.config" . | fromYaml ) }}
+{{- /* convert boolean value to cli compatiblity */}}
+  {{- if or ( eq "true" ($value | toString)) ( eq "false" ($value | toString)) }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
+  {{- else }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+  {{- end }}
+{{- end }}
+{{- range $key, $value := ( .Values.h2ogpt.additionalConfig ) }}
+{{- /* convert boolean value to cli compatiblity */}}
+  {{- if or ( eq "true" ($value | toString)) ( eq "false" ($value | toString)) }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
+  {{- else }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+  {{- end }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
new file mode 100644
index 000000000..4d1f74a70
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -0,0 +1,197 @@
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}
+spec:
+  replicas: {{ .Values.h2ogpt.replicaCount }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}
+  {{- if .Values.h2ogpt.updateStrategy }}
+  strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.h2ogpt.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}
+        {{- with .Values.h2ogpt.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.h2ogpt.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.h2ogpt.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.h2ogpt.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.h2ogpt.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.h2ogpt.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.h2ogpt.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.h2ogpt.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ include "h2ogpt.fullname" . }}
+          securityContext:
+            {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }}
+          image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }}
+          command: ["/bin/bash", "-c"]
+          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.h2ogpt.enabled (not .Values.vllm.enabled ) }}
+          args:
+            - >
+              python3 /workspace/generate.py
+          {{- end }}
+          ports:
+            - name: http
+              containerPort: 7860
+              protocol: TCP
+          {{- if .Values.h2ogpt.openai.enabled }}
+            - name: openai
+              containerPort: 5000
+              protocol: TCP
+          {{- end }}
+            - name: function
+              containerPort: 5002
+              protocol: TCP
+          {{- if .Values.h2ogpt.agent.enabled }}
+            - name: agent
+              containerPort: 5004
+              protocol: TCP
+          {{- end }}
+          {{- if .Values.h2ogpt.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.h2ogpt.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.h2ogpt.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-config
+          {{- if .Values.global.externalLLM.enabled }}
+            - secretRef:
+                name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
+          {{- end }}
+          env:
+          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) }}
+            - name: h2ogpt_inference_server
+              value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
+          {{- end }}
+          {{- range $key, $value := .Values.h2ogpt.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          {{- if .Values.global.externalLLM.enabled }}
+            - name: H2OGPT_MODEL_LOCK
+              value: {{ toJson .Values.global.externalLLM.modelLock | quote }}
+            - name: H2OGPT_SCORE_MODEL
+              value: None
+          {{- end }}
+          {{- if .Values.global.visionModels.enabled }}
+            - name: H2OGPT_VISIBLE_VISION_MODELS
+              value: {{ .Values.global.visionModels.visibleModels | quote }}
+            - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE
+              value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }}
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-volume
+              mountPath: /workspace/save
+              subPath: save
+            {{- if .Values.caCertificates }}
+            - name: ca-certificates
+              mountPath: /etc/ssl/certs/root-ca-bundle.crt
+              subPath: root-ca-bundle.crt
+            {{- end }}
+            {{ with .Values.h2ogpt.extraVolumeMounts }}
+            {{- toYaml . | nindent 12 }}
+            {{- end }}
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-volume
+          {{- if not .Values.h2ogpt.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName:  {{ include "h2ogpt.fullname" . }}-volume
+          {{- else}}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.h2ogpt.storage.size | quote }}
+                storageClassName: {{ .Values.h2ogpt.storage.class }}
+          {{- end }}
+        {{- if .Values.caCertificates }}
+        - name: ca-certificates
+          configMap:
+            name: {{ include "h2ogpt.fullname" . }}-ca-certificates
+        {{- end }}
+        {{- with .Values.h2ogpt.extraVolumes }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml
new file mode 100644
index 000000000..bd6e7141f
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml
@@ -0,0 +1,14 @@
+{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.h2ogpt.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.h2ogpt.storage.size | quote }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
new file mode 100644
index 000000000..7e9f13bb9
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
@@ -0,0 +1,37 @@
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-web
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+
+  {{- with .Values.h2ogpt.service.webServiceAnnotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}
+  ports:
+    - name: http
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.webPort }}
+      targetPort: 7860
+  {{- if .Values.h2ogpt.openai.enabled }}
+    - name: openai
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.openaiPort }}
+      targetPort: 5000
+  {{- end }}
+    - name: function
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.functionPort }}
+      targetPort: 5002
+  {{- if .Values.h2ogpt.agent.enabled }}
+    - name: agent
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.agentPort }}
+      targetPort: 5004
+  {{- end }}
+  type: {{ .Values.h2ogpt.service.type }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/service.yaml b/helm/h2ogpt-chart/templates/service.yaml
deleted file mode 100644
index 8d3ddb73d..000000000
--- a/helm/h2ogpt-chart/templates/service.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-web
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-
-  {{- with .Values.h2ogpt.service.webServiceAnnotations }}
-  annotations:
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}
-  ports:
-    - name: http
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.webPort }}
-      targetPort: 7860
-    - name: openai
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.openaiPort }}
-      targetPort: 5000
-    - name: function
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.functionPort }}
-      targetPort: 5002
-    - name: agent
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.agentsPort }}
-      targetPort: 5004
-  type: {{ .Values.h2ogpt.service.type }}
-{{- end }}
----
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}
-  ports:
-    - protocol: TCP
-      port: {{ .Values.h2ogpt.service.gptPort }}
-      targetPort: 8888
-  type: {{ .Values.h2ogpt.service.type }}
-{{- end }}
----
-{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.tgi.service.port }}
-      targetPort: 80
-  type: {{ .Values.tgi.service.type }}
-{{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.vllm.service.port }}
-      targetPort: 5000
-  type: {{ .Values.vllm.service.type }}
-{{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.lmdeploy.service.port }}
-      targetPort: 23333
-  type: {{ .Values.lmdeploy.service.type }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/validators.yaml b/helm/h2ogpt-chart/templates/validators.yaml
new file mode 100644
index 000000000..49fb1532b
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/validators.yaml
@@ -0,0 +1,3 @@
+{{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agent.enabled) .Values.agent.enabled }}
+  {{- fail " Both agent and h2ogpt.agent cannot be enabled. Enably only one and try again" }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-configmap.yaml b/helm/h2ogpt-chart/templates/vllm-configmap.yaml
new file mode 100644
index 000000000..66c187b3c
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-configmap.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.vllm.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.vllm.overrideConfig }}
+  {{ printf "%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
new file mode 100644
index 000000000..755a87aac
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
@@ -0,0 +1,149 @@
+{{- if .Values.vllm.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+spec:
+  replicas: {{ .Values.vllm.replicaCount }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  {{- if .Values.vllm.updateStrategy }}
+  strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.vllm.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+        {{- with .Values.vllm.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.vllm.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.vllm.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.vllm.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.vllm.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.vllm.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.vllm.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.vllm.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+          securityContext:
+            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
+          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
+          command: ["python3"]
+          args:
+            - "-m"
+            - "vllm.entrypoints.openai.api_server"
+            - "--port"
+            - "5000"
+            - "--host"
+            - "0.0.0.0"
+            - "--download-dir"
+            - "/workspace/.cache/huggingface/hub"
+{{- range $arg := .Values.vllm.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 5000
+              protocol: TCP
+          {{- if .Values.vllm.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.vllm.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.vllm.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
+          env:
+            - name: NCCL_IGNORE_DISABLED_P2P
+              value: "1"
+          {{- range $key, $value := .Values.vllm.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: shm
+              mountPath: /dev/shm
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- if not .Values.vllm.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.vllm.storage.size | quote }}
+                storageClassName: {{ .Values.vllm.storage.class }}
+          {{- end }}
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 10.24Gi
+          name: shm
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-pvc.yaml b/helm/h2ogpt-chart/templates/vllm-pvc.yaml
new file mode 100644
index 000000000..fe26f08ea
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-pvc.yaml
@@ -0,0 +1,16 @@
+---
+{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: {{ .Values.vllm.storage.class | quote }}
+  storageClassName: {{ .Values.vllm.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.vllm.storage.size | quote }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml
new file mode 100644
index 000000000..980d998cd
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-service.yaml
@@ -0,0 +1,15 @@
+{{- if .Values.vllm.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  ports:
+    - protocol: TCP
+      port: {{ .Values.vllm.service.port }}
+      targetPort: 5000
+  type: {{ .Values.vllm.service.type }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index b0e599bf4..7b7644dca 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -2,109 +2,101 @@ nameOverride: ""
 fullnameOverride: ""
 namespaceOverride: ""
 
+global:
+  externalLLM:
+    enabled: false
+    # -- list of secrets for h2ogpt and agent env
+    secret: {}
+#      OPENAI_AZURE_KEY: "value"
+#      OPENAI_AZURE_API_BASE: "value"
+#      OPENAI_API_KEY: "value"
+#      REPLICATE_API_TOKEN: "value"
+
+    modelLock:
+
+  visionModels:
+    # -- Enable vision models
+    enabled: false
+    # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
+    visibleModels: [ ]
+    rotateAlignResizeImage: false
+
 h2ogpt:
+  # -- Enable h2oGPT
   enabled: true
-  stack:
-    # -- Run h2oGPT and vLLM on same pod.
-    enabled: false 
+  # -- Enable agent
+  agent:
+    # -- Run agent with h2oGPT container
+    enabled: false
+    agent_workers: 5
+  openai:
+    enabled: true
+    openai_workers: 5
   replicaCount: 1
-  imagePullSecrets: 
+  imagePullSecrets:
   image:
     repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
-    tag: 
+    tag:
     pullPolicy: IfNotPresent
   initImage:
     repository:
     tag:
     pullPolicy:
-
-  # extra volumes, for more certs, mount under /etc/ssl/more-certs
+  # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs
   extraVolumes: []
+  # -- Extra volume mounts
   extraVolumeMounts: []
-
-  podAffinity:
   # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
+  podAffinity:
   # hostname:
   # zone:
-
   storage:
     size: 128Gi
-    class: 
+    class:
     useEphemeral: true
-  
-  externalLLM:
-    enabled: false
-    secret:
 
-    modelLock:
-
-    openAIAzure:
-      enabled: false
-
-    openAI:
-      enabled: False
-
-    replicate: 
-      enabled: false
-  
-  visionModels:
-    enabled: false
-    # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model
-    # -- Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
-    visibleModels: []
-    rotateAlignResizeImage: false
-
-# -- Example configs to use when not using Model Lock and External LLM
-  # overrideConfig:
-  #   base_model: h2oai/h2ogpt-4096-llama2-7b-chat
-  #   use_safetensors: True
-  #   prompt_type: llama2
-  #   save_dir: /workspace/save/
-  #   use_gpu_id: False
-  #   score_model: None
-  #   max_max_new_tokens: 2048
-  #   max_new_tokens: 1024
-
-  overrideConfig:
-    visible_login_tab: False
-    visible_system_tab: False
-    visible_models_tab: False
-    visible_hosts_tab: False
-    # change below to valid vision model or remove this entry
-    #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']"
-    rotate_align_resize_image: False
-    concurrency_count: 100
-    top_k_docs_max_show: 100
-    num_async: 10
-    # change below to valid directory or remove this entry
-    #save_dir: "/docker_logs"
-    score_model: "None"
-    enable_tts: False
-    enable_stt: False
-    enable_transcriptions: False
-    embedding_gpu_id: "cpu"
-    hf_embedding_model: "fake"
-    openai_server: True
-    share: False
-    enforce_h2ogpt_api_key: True
-    enforce_h2ogpt_ui_key: False
-    # change to something secure for ui access to backend
-    #h2ogpt_api_keys: "['api_key_change_me']"
-    metadata_in_context: ""
-    # change or remove if using model hub
-    #use_auth_token: "hf_xxxxx"
-    # change below to first visible model or remove this entry
-    #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']"
-    # change so ui or api cannot access without this password
-    #admin_pass: "admin_password_change_me"
+  # -- Defaults configs are set internally with recommended values. Set values if you really need to change.
+  # -- Supported configs are commented. If you don't pass any value, keep {}
+  overrideConfig: {}
+#    verbose:
+#    heap_app_id:
+#    num_async:
+#    save_dir:
+#    score_model:
+#    share:
+#    enforce_h2ogpt_api_key:
+#    enforce_h2ogpt_ui_key:
+#    h2ogpt_api_keys:
+#    use_auth_token:
+#    visible_models:
+#    top_k_docs_max_show:
+#    admin_pass:
+#    function_server:
+#    function_server_workers:
+#    multiple_workers_gunicorn:
+#    llava_model:
+#    enable_llava:
+#    enable_tts:
+#    enable_stt:
+#    enable_transcriptions:
+#    asr_model:
+#    pre_load_embedding_model:
+#    pre_load_image_audio_models:
+#    cut_distance:
+#    hf_embedding_model:
+#    enable_captions:
+#    enable_doctr:
+#    embedding_gpu_id:
+
+  # -- You can pass additional config here if overrideConfig does not have it.
+  additionalConfig: {}
 
   service:
     type: NodePort
     webPort: 80
     openaiPort: 5000
     functionPort: 5002
-    agentsPort: 5004
-    gptPort: 8888
+    agentPort: 5004
     webServiceAnnotations: {}
 
   updateStrategy:
@@ -112,9 +104,9 @@ h2ogpt:
 
   podSecurityContext:
     runAsNonRoot: true
-    runAsUser: 
-    runAsGroup: 
-    fsGroup: 
+    runAsUser:
+    runAsGroup:
+    fsGroup:
 
   securityContext:
     runAsNonRoot: true
@@ -126,59 +118,133 @@ h2ogpt:
       type: RuntimeDefault
 
   resources:
-  nodeSelector:
-  tolerations:
+    requests:
+      memory: 32Gi
+      nvidia.com/gpu: 0
+    limits:
+      memory: 64Gi
+      nvidia.com/gpu: 0
+  # -- Node taints to tolerate by the h2ogpt pods.
+  tolerations: []
+  # -- Node selector for the h2ogpt pods.
+  nodeSelector: {}
 
   env: {}
 
   podAnnotations: {}
   podLabels: {}
-  autoscaling: {}
 
-tgi:
-  enabled: false
+agent:
+  # -- Enable agent, this must be `false` if `h2ogpt.agent.enabled` is `true`
+  enabled: true
+  agent_workers: 5
+  autoscaling:
+    # Enable autoscaling (HPA) for agent
+    enabled: false
+    minReplicas: 1
+    maxReplicas: 2
+    targetMemory: 32Gi
+    targetCPU: 80
   replicaCount: 1
-
+  imagePullSecrets:
   image:
-    repository: ghcr.io/huggingface/text-generation-inference
-    tag: 0.9.3
+    repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
+    tag:
     pullPolicy: IfNotPresent
-
+  initImage:
+    repository:
+    tag:
+    pullPolicy:
+  # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs
+  extraVolumes: []
+  # -- Extra volume mounts
+  extraVolumeMounts: []
+  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
   podAffinity:
-    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
-    # hostname:
-    # zone:
+  # hostname:
+  # zone:
 
   storage:
-    size: 512Gi
-    class: 
+    size: 128Gi
+    class:
     useEphemeral: true
-  
-  overrideConfig:
-  hfSecret:
-  containerArgs:
+
+  # -- Defaults configs are set internally with recommended values. Set values if you really need to change.
+  # -- Supported configs are commented. If you don't pass any value, keep {}
+  overrideConfig: {}
+#    verbose:
+#    heap_app_id:
+#    num_async:
+#    save_dir:
+#    score_model:
+#    share:
+#    enforce_h2ogpt_api_key:
+#    enforce_h2ogpt_ui_key:
+#    h2ogpt_api_keys:
+#    use_auth_token:
+#    visible_models:
+#    top_k_docs_max_show:
+#    admin_pass:
+#    multiple_workers_gunicorn:
+#    llava_model:
+#    enable_llava:
+#    enable_tts:
+#    enable_stt:
+#    enable_transcriptions:
+#    asr_model:
+#    pre_load_embedding_model:
+#    pre_load_image_audio_models:
+#    cut_distance:
+#    hf_embedding_model:
+#    enable_captions:
+#    enable_doctr:
+#    embedding_gpu_id:
+
+  # -- You can pass additional config here if overrideConfig does not have it.
+  additionalConfig: {}
 
   service:
-    type: ClusterIP
-    port: 8080
+    type: NodePort
+    agentPort: 5004
+    annotations: {}
 
   updateStrategy:
     type: RollingUpdate
 
   podSecurityContext:
+    runAsNonRoot: true
+    runAsUser:
+    runAsGroup:
+    fsGroup:
+
   securityContext:
+    runAsNonRoot: true
+    allowPrivilegeEscalation: false
+    capabilities:
+      drop:
+        - ALL
+    seccompProfile:
+      type: RuntimeDefault
 
   resources:
-  nodeSelector:
-  tolerations:
+    requests:
+      memory: 32Gi
+      nvidia.com/gpu: 1
+    limits:
+      memory: 64Gi
+      nvidia.com/gpu: 1
+  # -- Node taints to tolerate by the agent pods.
+  tolerations: []
+  # -- Node selector for the agent pods.
+  nodeSelector: {}
 
   env: {}
 
   podAnnotations: {}
   podLabels: {}
-  autoscaling: {}
 
 vllm:
+  # -- Enable vllm
   enabled: false
   replicaCount: 1
 
@@ -186,9 +252,9 @@ vllm:
     repository: vllm/vllm-openai
     tag: latest
     pullPolicy: IfNotPresent
-
+  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
   podAffinity:
-    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
+
     # hostname:
     # zone:
 
@@ -245,51 +311,7 @@ vllm:
 
   podAnnotations: {}
   podLabels: {}
-  autoscaling: {}
-
-lmdeploy:
-  enabled: false
-  replicaCount: 1
-
-  image:
-    repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy
-    tag:
-    pullPolicy: IfNotPresent
 
-  podAffinity:
-    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
-    # hostname:
-    # zone:
-
-  storage:
-    size: 512Gi
-    class:
-    useEphemeral: true
-
-  overrideConfig:
-  hfSecret:
-  containerArgs:
-    - "OpenGVLab/InternVL-Chat-V1-5"
-
-  service:
-    type: ClusterIP
-    port: 23333
-
-  updateStrategy:
-    type: RollingUpdate
-
-  podSecurityContext:
-  securityContext:
-
-  resources:
-  nodeSelector:
-  tolerations:
-
-  env: {}
-
-  podAnnotations: {}
-  podLabels: {}
-  autoscaling: {}
 
 # -- CA certs
 caCertificates: ""