diff --git a/helm/h2ogpt-chart/Chart.yaml b/helm/h2ogpt-chart/Chart.yaml index d90a7d69e..5a597ed84 100644 --- a/helm/h2ogpt-chart/Chart.yaml +++ b/helm/h2ogpt-chart/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: h2ogpt -description: A Helm chart for h2ogpt +description: A Helm chart for h2oGPT # A chart can be either an 'application' or a 'library' chart. # @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0-288 +version: 0.2.1-1254 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: 0.1.0-288 +appVersion: 0.2.1-1254 diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md new file mode 100644 index 000000000..b4b6bc94b --- /dev/null +++ b/helm/h2ogpt-chart/README.md @@ -0,0 +1,148 @@ +# h2ogpt + +![Version: 0.2.1-1254](https://img.shields.io/badge/Version-0.2.1--1254-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.2.1-1254](https://img.shields.io/badge/AppVersion-0.2.1--1254-informational?style=flat-square) + +A Helm chart for h2oGPT + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| agent.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. | +| agent.agent_workers | int | `5` | | +| agent.autoscaling.enabled | bool | `false` | | +| agent.autoscaling.maxReplicas | int | `2` | | +| agent.autoscaling.minReplicas | int | `1` | | +| agent.autoscaling.targetCPU | int | `80` | | +| agent.autoscaling.targetMemory | string | `"32Gi"` | | +| agent.enabled | bool | `true` | Enable agent, this must be `false` if `h2ogpt.agent.enabled` is `true` | +| agent.env | object | `{}` | | +| agent.extraVolumeMounts | list | `[]` | Extra volume mounts | +| agent.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs | +| agent.image.pullPolicy | string | `"IfNotPresent"` | | +| agent.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` | | +| agent.image.tag | string | `nil` | | +| agent.imagePullSecrets | string | `nil` | | +| agent.initImage.pullPolicy | string | `nil` | | +| agent.initImage.repository | string | `nil` | | +| agent.initImage.tag | string | `nil` | | +| agent.nodeSelector | object | `{}` | Node selector for the agent pods. | +| agent.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} | +| agent.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | +| agent.podAnnotations | object | `{}` | | +| agent.podLabels | object | `{}` | | +| agent.podSecurityContext.fsGroup | string | `nil` | | +| agent.podSecurityContext.runAsGroup | string | `nil` | | +| agent.podSecurityContext.runAsNonRoot | bool | `true` | | +| agent.podSecurityContext.runAsUser | string | `nil` | | +| agent.replicaCount | int | `1` | | +| agent.resources.limits."nvidia.com/gpu" | int | `1` | | +| agent.resources.limits.memory | string | `"64Gi"` | | +| agent.resources.requests."nvidia.com/gpu" | int | `1` | | +| agent.resources.requests.memory | string | `"32Gi"` | | +| agent.securityContext.allowPrivilegeEscalation | bool | `false` | | +| agent.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| agent.securityContext.runAsNonRoot | bool | `true` | | +| agent.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | +| agent.service.agentPort | int | `5004` | | +| agent.service.annotations | object | `{}` | | +| agent.service.type | string | `"NodePort"` | | +| agent.storage.class | string | `nil` | | +| agent.storage.size | string | `"128Gi"` | | +| agent.storage.useEphemeral | bool | `true` | | +| agent.tolerations | list | `[]` | Node taints to tolerate by the agent pods. | +| agent.updateStrategy.type | string | `"RollingUpdate"` | | +| caCertificates | string | `""` | CA certs | +| fullnameOverride | string | `""` | | +| global.externalLLM.enabled | bool | `false` | | +| global.externalLLM.modelLock | string | `nil` | | +| global.externalLLM.secret | object | `{}` | list of secrets for h2ogpt and agent env | +| global.visionModels.enabled | bool | `false` | Enable vision models | +| global.visionModels.rotateAlignResizeImage | bool | `false` | | +| global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] | +| h2ogpt.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. | +| h2ogpt.agent | object | `{"agent_workers":5,"enabled":false}` | Enable agent | +| h2ogpt.agent.enabled | bool | `false` | Run agent with h2oGPT container | +| h2ogpt.enabled | bool | `true` | Enable h2oGPT | +| h2ogpt.env | object | `{}` | | +| h2ogpt.extraVolumeMounts | list | `[]` | Extra volume mounts | +| h2ogpt.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs | +| h2ogpt.image.pullPolicy | string | `"IfNotPresent"` | | +| h2ogpt.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` | | +| h2ogpt.image.tag | string | `nil` | | +| h2ogpt.imagePullSecrets | string | `nil` | | +| h2ogpt.initImage.pullPolicy | string | `nil` | | +| h2ogpt.initImage.repository | string | `nil` | | +| h2ogpt.initImage.tag | string | `nil` | | +| h2ogpt.nodeSelector | object | `{}` | Node selector for the h2ogpt pods. | +| h2ogpt.openai.enabled | bool | `true` | | +| h2ogpt.openai.openai_workers | int | `5` | | +| h2ogpt.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} | +| h2ogpt.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | +| h2ogpt.podAnnotations | object | `{}` | | +| h2ogpt.podLabels | object | `{}` | | +| h2ogpt.podSecurityContext.fsGroup | string | `nil` | | +| h2ogpt.podSecurityContext.runAsGroup | string | `nil` | | +| h2ogpt.podSecurityContext.runAsNonRoot | bool | `true` | | +| h2ogpt.podSecurityContext.runAsUser | string | `nil` | | +| h2ogpt.replicaCount | int | `1` | | +| h2ogpt.resources.limits."nvidia.com/gpu" | int | `0` | | +| h2ogpt.resources.limits.memory | string | `"64Gi"` | | +| h2ogpt.resources.requests."nvidia.com/gpu" | int | `0` | | +| h2ogpt.resources.requests.memory | string | `"32Gi"` | | +| h2ogpt.securityContext.allowPrivilegeEscalation | bool | `false` | | +| h2ogpt.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| h2ogpt.securityContext.runAsNonRoot | bool | `true` | | +| h2ogpt.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | +| h2ogpt.service.agentPort | int | `5004` | | +| h2ogpt.service.functionPort | int | `5002` | | +| h2ogpt.service.openaiPort | int | `5000` | | +| h2ogpt.service.type | string | `"NodePort"` | | +| h2ogpt.service.webPort | int | `80` | | +| h2ogpt.service.webServiceAnnotations | object | `{}` | | +| h2ogpt.storage.class | string | `nil` | | +| h2ogpt.storage.size | string | `"128Gi"` | | +| h2ogpt.storage.useEphemeral | bool | `true` | | +| h2ogpt.tolerations | list | `[]` | Node taints to tolerate by the h2ogpt pods. | +| h2ogpt.updateStrategy.type | string | `"RollingUpdate"` | | +| nameOverride | string | `""` | | +| namespaceOverride | string | `""` | | +| vllm.containerArgs[0] | string | `"--model"` | | +| vllm.containerArgs[1] | string | `"h2oai/h2ogpt-4096-llama2-7b-chat"` | | +| vllm.containerArgs[2] | string | `"--tokenizer"` | | +| vllm.containerArgs[3] | string | `"hf-internal-testing/llama-tokenizer"` | | +| vllm.containerArgs[4] | string | `"--tensor-parallel-size"` | | +| vllm.containerArgs[5] | int | `2` | | +| vllm.containerArgs[6] | string | `"--seed"` | | +| vllm.containerArgs[7] | int | `1234` | | +| vllm.containerArgs[8] | string | `"--trust-remote-code"` | | +| vllm.enabled | bool | `false` | Enable vllm | +| vllm.env.DO_NOT_TRACK | string | `"1"` | | +| vllm.env.VLLM_NO_USAGE_STATS | string | `"1"` | | +| vllm.image.pullPolicy | string | `"IfNotPresent"` | | +| vllm.image.repository | string | `"vllm/vllm-openai"` | | +| vllm.image.tag | string | `"latest"` | | +| vllm.imagePullSecrets | string | `nil` | | +| vllm.nodeSelector | string | `nil` | | +| vllm.overrideConfig | string | `nil` | | +| vllm.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | +| vllm.podAnnotations | object | `{}` | | +| vllm.podLabels | object | `{}` | | +| vllm.podSecurityContext.fsGroup | string | `nil` | | +| vllm.podSecurityContext.runAsGroup | string | `nil` | | +| vllm.podSecurityContext.runAsNonRoot | bool | `true` | | +| vllm.podSecurityContext.runAsUser | string | `nil` | | +| vllm.replicaCount | int | `1` | | +| vllm.resources | string | `nil` | | +| vllm.securityContext.allowPrivilegeEscalation | bool | `false` | | +| vllm.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| vllm.securityContext.runAsNonRoot | bool | `true` | | +| vllm.securityContext.seccompProfile | string | `nil` | | +| vllm.service.port | int | `5000` | | +| vllm.service.type | string | `"ClusterIP"` | | +| vllm.storage.class | string | `nil` | | +| vllm.storage.size | string | `"512Gi"` | | +| vllm.storage.useEphemeral | bool | `true` | | +| vllm.tolerations | string | `nil` | | +| vllm.updateStrategy.type | string | `"RollingUpdate"` | | + diff --git a/helm/h2ogpt-chart/templates/NOTES.txt b/helm/h2ogpt-chart/templates/NOTES.txt new file mode 100644 index 000000000..c32a7790f --- /dev/null +++ b/helm/h2ogpt-chart/templates/NOTES.txt @@ -0,0 +1,8 @@ +Thank you for installing {{ .Chart.Name }}. + +Your release is named {{ .Release.Name }}. + +To learn more about the release, try: + + $ helm status {{ .Release.Name }} + $ helm get all {{ .Release.Name }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl index a8352a4ad..61e2168dd 100644 --- a/helm/h2ogpt-chart/templates/_helpers.tpl +++ b/helm/h2ogpt-chart/templates/_helpers.tpl @@ -68,3 +68,131 @@ Create the name of the service account to use {{- default "default" .Values.serviceAccount.name }} {{- end }} {{- end }} + +{{/* +Config for h2oGPT +*/}} + +{{- define "h2ogpt.config" -}} +{{- with .Values.h2ogpt }} +verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }} +{{- if .overrideConfig.heap_app_id }} +heap_app_id: {{ .overrideConfig.heap_app_id }} +{{- end }} +num_async: {{ default 10 .overrideConfig.num_async }} +save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }} +score_model: {{ default "None" .overrideConfig.score_model }} +share: {{ default "False" (.overrideConfig.share | quote ) }} +enforce_h2ogpt_api_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_api_key | quote ) }} +enforce_h2ogpt_ui_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_ui_key | quote ) }} +{{- if .overrideConfig.h2ogpt_api_keys }} +h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }} +{{- end }} +{{- if .overrideConfig.use_auth_token }} +use_auth_token: {{ .overrideConfig.use_auth_token }} +{{- end }} +visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }} +{{/*visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}*/}} +top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }} +{{- if .overrideConfig.admin_pass }} +admin_pass: {{ .overrideConfig.admin_pass }} +{{- end }} +{{- if .openai.enabled }} +openai_server: "True" +openai_port: 5000 +openai_workers: {{ default 5 .openai.openai_workers }} +{{- end }} +{{- if .agent.enabled }} +agent_server: "True" +agent_port: 5004 +agent_workers: {{ .agent.agent_workers }} +{{- end }} +function_server: {{ default "True" ( .overrideConfig.function_server | quote ) }} +function_port: 5002 +function_server_workers: {{ default 1 .overrideConfig.function_server_workers }} +multiple_workers_gunicorn: {{ default "True" ( .overrideConfig.multiple_workers_gunicorn | quote ) }} +llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }} +enable_llava: {{ default "True" ( .overrideConfig.enable_llava | quote ) }} +{{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }} +enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }} +enable_stt: {{ default "True" ( .overrideConfig.enable_stt | quote ) }} +enable_transcriptions: {{ default "True" ( .overrideConfig.enable_transcriptions | quote ) }} +asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }} +pre_load_embedding_model: {{ default "True" (.overrideConfig.pre_load_embedding_model | quote ) }} +pre_load_image_audio_models: {{ default "True" ( .overrideConfig.pre_load_image_audio_models | quote ) }} +cut_distance: {{ default 10000 .overrideConfig.cut_distance }} +hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }} +enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }} +enable_doctr: {{ default "True" ( .overrideConfig.enable_doctr | quote ) }} +{{- else }} +enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }} +enable_stt: {{ default "False" ( .overrideConfig.enable_stt | quote ) }} +enable_transcriptions: {{ default "False" ( .overrideConfig.enable_transcriptions | quote ) }} +embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }} +hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }} +pre_load_embedding_model: {{ default "False" ( .overrideConfig.pre_load_embedding_model | quote ) }} +pre_load_image_audio_models: {{ default "False" ( .overrideConfig.pre_load_image_audio_models | quote ) }} +enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }} +enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Config for agent +*/}} + +{{- define "agent.config" -}} +{{- with .Values.agent }} +verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }} +{{- if .overrideConfig.heap_app_id }} +heap_app_id: {{ .overrideConfig.heap_app_id }} +{{- end }} +num_async: {{ default 10 .overrideConfig.num_async }} +save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }} +score_model: {{ default "None" .overrideConfig.score_model }} +share: {{ default "False" (.overrideConfig.share | quote ) }} +enforce_h2ogpt_api_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_api_key | quote ) }} +enforce_h2ogpt_ui_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_ui_key | quote ) }} +{{- if .overrideConfig.h2ogpt_api_keys }} +h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }} +{{- end }} +{{- if .overrideConfig.use_auth_token }} +use_auth_token: {{ .overrideConfig.use_auth_token }} +{{- end }} +visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }} +{{/*visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}*/}} +top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }} +{{- if .overrideConfig.admin_pass }} +admin_pass: {{ .overrideConfig.admin_pass }} +{{- end }} +agent_server: "True" +agent_port: 5004 +agent_workers: {{ default 5 .agent_workers }} +multiple_workers_gunicorn: {{ default "True" ( .overrideConfig.multiple_workers_gunicorn | quote ) }} +llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }} +enable_llava: {{ default "True" ( .overrideConfig.enable_llava | quote ) }} +{{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }} +enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }} +enable_stt: {{ default "True" ( .overrideConfig.enable_stt | quote ) }} +enable_transcriptions: {{ default "True" ( .overrideConfig.enable_transcriptions | quote ) }} +asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }} +pre_load_embedding_model: {{ default "True" (.overrideConfig.pre_load_embedding_model | quote ) }} +pre_load_image_audio_models: {{ default "True" ( .overrideConfig.pre_load_image_audio_models | quote ) }} +cut_distance: {{ default 10000 .overrideConfig.cut_distance }} +hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }} +enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }} +enable_doctr: {{ default "True" ( .overrideConfig.enable_doctr | quote ) }} +{{- else }} +enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }} +enable_stt: {{ default "False" ( .overrideConfig.enable_stt | quote ) }} +enable_transcriptions: {{ default "False" ( .overrideConfig.enable_transcriptions | quote ) }} +embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }} +hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }} +pre_load_embedding_model: {{ default "False" ( .overrideConfig.pre_load_embedding_model | quote ) }} +pre_load_image_audio_models: {{ default "False" ( .overrideConfig.pre_load_image_audio_models | quote ) }} +enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }} +enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }} +{{- end }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-configmap.yaml b/helm/h2ogpt-chart/templates/agents-configmap.yaml new file mode 100644 index 000000000..b6fa6e51e --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-configmap.yaml @@ -0,0 +1,26 @@ +{{- if .Values.agent.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-agent-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := ( include "agent.config" . | fromYaml ) }} +{{- /* convert boolean value to cli compatiblity */}} + {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} + {{- else }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} + {{- end }} +{{- end }} +{{- range $key, $value := ( .Values.agent.additionalConfig ) }} +{{- /* convert boolean value to cli compatiblity */}} + {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} + {{- else }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} + {{- end }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml new file mode 100644 index 000000000..ac737a792 --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml @@ -0,0 +1,167 @@ +{{- if .Values.agent.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }}-agent + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }}-agent +spec: + replicas: {{ .Values.agent.replicaCount }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }}-agent + {{- if .Values.agent.updateStrategy }} + strategy: {{- toYaml .Values.agent.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.agent.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }}-agent + {{- with .Values.agent.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.agent.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.agent.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.agent.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.agent.podAffinity }} + podAntiAffinity: + {{- if .Values.agent.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.agent.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.agent.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.agent.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "h2ogpt.fullname" . }}-agent + securityContext: + {{- toYaml .Values.agent.securityContext | nindent 12 }} + image: "{{ .Values.agent.image.repository }}:{{ .Values.agent.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.agent.image.pullPolicy }} + command: ["/bin/bash", "-c"] + args: + - > + python3 /workspace/generate.py + ports: + - name: agent + containerPort: 5004 + protocol: TCP + {{- if .Values.agent.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.agent.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.agent.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.agent.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.agent.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-agent-config + {{- if .Values.global.externalLLM.enabled }} + - secretRef: + name: {{ include "h2ogpt.fullname" . }}-external-llm-secret + {{- end }} + env: + {{- range $key, $value := .Values.agent.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if .Values.global.externalLLM.enabled }} + - name: H2OGPT_MODEL_LOCK + value: {{ toJson .Values.global.externalLLM.modelLock | quote }} + - name: H2OGPT_SCORE_MODEL + value: None + {{- end }} + {{- if .Values.global.visionModels.enabled }} + - name: H2OGPT_VISIBLE_VISION_MODELS + value: {{ .Values.global.visionModels.visibleModels | quote }} + - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE + value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }} + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-agent-volume + mountPath: /workspace/.cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-agent-volume + mountPath: /workspace/save + subPath: save + {{- if .Values.caCertificates }} + - name: ca-certificates + mountPath: /etc/ssl/certs/root-ca-bundle.crt + subPath: root-ca-bundle.crt + {{- end }} + {{ with .Values.agent.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: {{ include "h2ogpt.fullname" . }}-agent-volume + {{- if not .Values.agent.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-agent-volume + {{- else}} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.agent.storage.size | quote }} + storageClassName: {{ .Values.agent.storage.class }} + {{- end }} + {{- if .Values.caCertificates }} + - name: ca-certificates + configMap: + name: {{ include "h2ogpt.fullname" . }}-ca-certificates + {{- end }} + {{- with .Values.agent.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-hpa.yaml b/helm/h2ogpt-chart/templates/agents-hpa.yaml new file mode 100644 index 000000000..5cf083bbb --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-hpa.yaml @@ -0,0 +1,33 @@ +{{- if .Values.agent.autoscaling.enabled | default false }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ .Release.Name }}-agent + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "h2ogpt.fullname" . }}-agent + minReplicas: {{ .Values.agent.autoscaling.minReplicas }} + maxReplicas: {{ .Values.agent.autoscaling.maxReplicas }} + metrics: + {{- if .Values.agent.autoscaling.targetCPU }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.agent.autoscaling.targetCPU }} + {{- end }} + {{- if .Values.agent.autoscaling.targetMemory }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.agent.autoscaling.targetMemory }} + {{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-pvc.yaml b/helm/h2ogpt-chart/templates/agents-pvc.yaml new file mode 100644 index 000000000..2ac48c921 --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-pvc.yaml @@ -0,0 +1,14 @@ +{{- if and (.Values.agent.enabled) (not .Values.agent.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-agent-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.agent.storage.class }} + resources: + requests: + storage: {{ .Values.agent.storage.size | quote }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-service.yaml b/helm/h2ogpt-chart/templates/agents-service.yaml new file mode 100644 index 000000000..6b0653555 --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-service.yaml @@ -0,0 +1,21 @@ +{{- if .Values.agent.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-agent + namespace: {{ include "h2ogpt.namespace" . | quote }} + + {{- with .Values.agent.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }}-agent + ports: + - name: agent + protocol: TCP + port: {{ .Values.agent.service.agentPort }} + targetPort: 5004 + type: {{ .Values.agent.service.type }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml new file mode 100644 index 000000000..84d2f4199 --- /dev/null +++ b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml @@ -0,0 +1,12 @@ +{{- if .Values.caCertificates}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-ca-certificates + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: + root-ca-bundle.crt: | + {{ .Values.caCertificates | nindent 4 | trim }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/config-map.yaml b/helm/h2ogpt-chart/templates/config-map.yaml deleted file mode 100644 index 64aca5503..000000000 --- a/helm/h2ogpt-chart/templates/config-map.yaml +++ /dev/null @@ -1,69 +0,0 @@ - -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.h2ogpt.overrideConfig }} - {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.tgi.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.tgi.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.vllm.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.vllm.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.lmdeploy.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.lmdeploy.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.caCertificates}} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-ca-certificates - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: - root-ca-bundle.crt: | - {{ .Values.caCertificates | nindent 4 | trim }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/deployment.yaml b/helm/h2ogpt-chart/templates/deployment.yaml deleted file mode 100644 index d89d8a3cb..000000000 --- a/helm/h2ogpt-chart/templates/deployment.yaml +++ /dev/null @@ -1,884 +0,0 @@ -{{- if and .Values.vllm.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} - {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if .Values.h2ogpt.stack.enabled }} - {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} - {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} - {{- end }} -{{- end }} ---- -{{- if .Values.h2ogpt.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }} - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }} -spec: - {{- if not .Values.h2ogpt.autoscaling.enabled }} - replicas: {{ .Values.h2ogpt.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }} - {{- if .Values.h2ogpt.updateStrategy }} - strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.h2ogpt.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }} - {{- with .Values.h2ogpt.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.h2ogpt.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.h2ogpt.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.h2ogpt.podAffinity }} - podAntiAffinity: - {{- if .Values.h2ogpt.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.h2ogpt.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.h2ogpt.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.h2ogpt.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - securityContext: - {{- toYaml .Values.vllm.securityContext | nindent 12 }} - image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} - command: ["python3"] - args: - - "-m" - - "vllm.entrypoints.openai.api_server" - - "--port" - - "5000" - - "--host" - - "0.0.0.0" - - "--download-dir" - - "/workspace/.cache/huggingface/hub" -{{- range $arg := .Values.vllm.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 5000 - protocol: TCP - {{- if .Values.vllm.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.vllm.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.vllm.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - {{- range $key, $value := .Values.vllm.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /dev/shm - subPath: shm - {{- end }} - - name: {{ include "h2ogpt.fullname" . }} - securityContext: - {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} - image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }} - command: ["/bin/bash", "-c"] - {{- if .Values.h2ogpt.stack.enabled }} - args: - - > - while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}'' - http://localhost:5000/v1/models)" != "200" ]]; do - echo "Waiting for inference service to become ready... (2sec)" - sleep 2 - done - - python3 /workspace/generate.py - {{- end }} - {{- if not .Values.h2ogpt.stack.enabled }} - {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }} - args: - - > - python3 /workspace/generate.py - {{- end }} - {{- end }} - ports: - - name: http - containerPort: 7860 - protocol: TCP - - name: gpt - containerPort: 8888 - protocol: TCP - - name: openai - containerPort: 5000 - protocol: TCP - - name: function - containerPort: 5002 - protocol: TCP - - name: agent - containerPort: 5004 - protocol: TCP - {{- if .Values.h2ogpt.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.h2ogpt.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.h2ogpt.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-config - env: - {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" - {{- end }} - {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" - {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" - {{- end }} - {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled) }} - - name: h2ogpt_inference_server - value: "vllm:localhost:5000" - {{- end }} - {{- range $key, $value := .Values.h2ogpt.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }} - - name: OPENAI_AZURE_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: OPENAI_AZURE_KEY - - name: OPENAI_AZURE_API_BASE - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: OPENAI_AZURE_API_BASE - {{- end }} - {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }} - - name: OPENAI_API_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: OPENAI_API_KEY - {{- end }} - {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }} - - name: REPLICATE_API_TOKEN - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: REPLICATE_API_TOKEN - {{- end }} - {{- if .Values.h2ogpt.externalLLM.enabled }} - - name: H2OGPT_MODEL_LOCK - value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }} - - name: H2OGPT_SCORE_MODEL - value: None - {{- end }} - {{- if .Values.h2ogpt.visionModels.enabled }} - - name: H2OGPT_VISIBLE_VISION_MODELS - value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }} - - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE - value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }} - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-volume - mountPath: /workspace/.cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-volume - mountPath: /workspace/save - subPath: save - {{- if .Values.caCertificates }} - - name: ca-certificates - mountPath: /etc/ssl/certs/root-ca-bundle.crt - subPath: root-ca-bundle.crt - {{- end }} - {{ with .Values.h2ogpt.extraVolumeMounts }} - {{- toYaml . | nindent 12 }} - {{- end }} - volumes: - - name: {{ include "h2ogpt.fullname" . }}-volume - {{- if not .Values.h2ogpt.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-volume - {{- else}} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.h2ogpt.storage.size | quote }} - storageClassName: {{ .Values.h2ogpt.storage.class }} - {{- end }} - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - {{- end }} - {{- if .Values.caCertificates }} - - name: ca-certificates - configMap: - name: {{ include "h2ogpt.fullname" . }}-ca-certificates - {{- end }} - {{- with .Values.h2ogpt.extraVolumes }} - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} ---- -{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.h2ogpt.storage.class | quote }} - storageClassName: {{ .Values.h2ogpt.storage.class }} - resources: - requests: - storage: {{ .Values.h2ogpt.storage.size | quote }} -{{- end }} - ---- -{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference -spec: - {{- if not .Values.tgi.autoscaling.enabled }} - replicas: {{ .Values.tgi.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- if .Values.tgi.updateStrategy }} - strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.tgi.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- with .Values.tgi.podLabels }} - {{ toYaml . | nindent 6 }} - {{- end }} - spec: - {{- with .Values.tgi.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.tgi.podAffinity }} - podAntiAffinity: - {{- if .Values.tgi.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.tgi.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.tgi.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - securityContext: - {{- toYaml .Values.tgi.securityContext | nindent 12 }} - image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}" - imagePullPolicy: {{ .Values.tgi.image.pullPolicy }} - command: [] - args: -{{- range $arg := .Values.tgi.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 80 - protocol: TCP - {{- if .Values.tgi.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.tgi.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.tgi.resources | nindent 12 }} - env: - {{- range $key, $value := .Values.tgi.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - - secretRef: - name: {{ .Values.tgi.hfSecret }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /app/cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /data - subPath: data - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /dev/shm - subPath: shm - volumes: - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - {{- end }} - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- if not .Values.tgi.storage.useEphemeral}} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- else}} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} - storageClassName: {{ .Values.tgi.storage.class }} - {{- end }} -{{- end }} ---- -{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.h2ogpt.storage.class | quote }} - storageClassName: {{ .Values.tgi.storage.class }} - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} -{{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference -spec: - {{- if not .Values.vllm.autoscaling.enabled }} - replicas: {{ .Values.vllm.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference - {{- if .Values.vllm.updateStrategy }} - strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.vllm.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference - {{- with .Values.vllm.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.vllm.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.vllm.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.vllm.podAffinity }} - podAntiAffinity: - {{- if .Values.vllm.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.vllm.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.vllm.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.vllm.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - securityContext: - {{- toYaml .Values.vllm.securityContext | nindent 12 }} - image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} - command: ["python3"] - args: - - "-m" - - "vllm.entrypoints.openai.api_server" - - "--port" - - "5000" - - "--host" - - "0.0.0.0" - - "--download-dir" - - "/workspace/.cache/huggingface/hub" -{{- range $arg := .Values.vllm.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 5000 - protocol: TCP - {{- if .Values.vllm.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.vllm.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.vllm.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - {{- range $key, $value := .Values.vllm.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: shm - mountPath: /dev/shm - volumes: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - - emptyDir: - medium: Memory - sizeLimit: 10.24Gi - name: shm -{{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.vllm.storage.class | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} -{{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference -spec: - {{- if not .Values.lmdeploy.autoscaling.enabled }} - replicas: {{ .Values.lmdeploy.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - {{- if .Values.lmdeploy.updateStrategy }} - strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.lmdeploy.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - {{- with .Values.lmdeploy.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.lmdeploy.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.lmdeploy.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.lmdeploy.podAffinity }} - podAntiAffinity: - {{- if .Values.lmdeploy.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.lmdeploy.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.lmdeploy.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.lmdeploy.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - securityContext: - {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }} - image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }} - command: ["lmdeploy"] - args: - - "serve" - - "api_server" -{{- range $arg := .Values.lmdeploy.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 23333 - protocol: TCP - {{- if .Values.lmdeploy.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.lmdeploy.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.lmdeploy.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - - name: HF_HOME - value: "/workspace/.cache" - {{- range $key, $value := .Values.lmdeploy.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: shm - mountPath: /dev/shm - volumes: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - {{- if not .Values.lmdeploy.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.lmdeploy.storage.size | quote }} - storageClassName: {{ .Values.lmdeploy.storage.class }} - {{- end }} - - emptyDir: - medium: Memory - sizeLimit: 10.24Gi - name: shm -{{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.lmdeploy.storage.class | quote }} - storageClassName: {{ .Values.lmdeploy.storage.class }} - resources: - requests: - storage: {{ .Values.lmdeploy.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml new file mode 100644 index 000000000..044d9eeae --- /dev/null +++ b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml @@ -0,0 +1,14 @@ +{{- if and .Values.global.externalLLM.enabled (or .Values.agent.enabled .Values.h2ogpt.enabled) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "h2ogpt.fullname" . }}-external-llm-secret + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +type: Opaque +stringData: +{{- range $key, $value := .Values.global.externalLLM.secret }} + {{ $key }}: {{ $value | quote }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml new file mode 100644 index 000000000..ceb8a18d9 --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml @@ -0,0 +1,26 @@ +{{- if .Values.h2ogpt.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := ( include "h2ogpt.config" . | fromYaml ) }} +{{- /* convert boolean value to cli compatiblity */}} + {{- if or ( eq "true" ($value | toString)) ( eq "false" ($value | toString)) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} + {{- else }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} + {{- end }} +{{- end }} +{{- range $key, $value := ( .Values.h2ogpt.additionalConfig ) }} +{{- /* convert boolean value to cli compatiblity */}} + {{- if or ( eq "true" ($value | toString)) ( eq "false" ($value | toString)) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} + {{- else }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} + {{- end }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml new file mode 100644 index 000000000..4d1f74a70 --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -0,0 +1,197 @@ +{{- if .Values.h2ogpt.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }} + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }} +spec: + replicas: {{ .Values.h2ogpt.replicaCount }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }} + {{- if .Values.h2ogpt.updateStrategy }} + strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.h2ogpt.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }} + {{- with .Values.h2ogpt.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.h2ogpt.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.h2ogpt.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.h2ogpt.podAffinity }} + podAntiAffinity: + {{- if .Values.h2ogpt.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.h2ogpt.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.h2ogpt.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.h2ogpt.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "h2ogpt.fullname" . }} + securityContext: + {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} + image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }} + command: ["/bin/bash", "-c"] + {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.h2ogpt.enabled (not .Values.vllm.enabled ) }} + args: + - > + python3 /workspace/generate.py + {{- end }} + ports: + - name: http + containerPort: 7860 + protocol: TCP + {{- if .Values.h2ogpt.openai.enabled }} + - name: openai + containerPort: 5000 + protocol: TCP + {{- end }} + - name: function + containerPort: 5002 + protocol: TCP + {{- if .Values.h2ogpt.agent.enabled }} + - name: agent + containerPort: 5004 + protocol: TCP + {{- end }} + {{- if .Values.h2ogpt.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.h2ogpt.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.h2ogpt.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-config + {{- if .Values.global.externalLLM.enabled }} + - secretRef: + name: {{ include "h2ogpt.fullname" . }}-external-llm-secret + {{- end }} + env: + {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) }} + - name: h2ogpt_inference_server + value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" + {{- end }} + {{- range $key, $value := .Values.h2ogpt.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if .Values.global.externalLLM.enabled }} + - name: H2OGPT_MODEL_LOCK + value: {{ toJson .Values.global.externalLLM.modelLock | quote }} + - name: H2OGPT_SCORE_MODEL + value: None + {{- end }} + {{- if .Values.global.visionModels.enabled }} + - name: H2OGPT_VISIBLE_VISION_MODELS + value: {{ .Values.global.visionModels.visibleModels | quote }} + - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE + value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }} + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-volume + mountPath: /workspace/.cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-volume + mountPath: /workspace/save + subPath: save + {{- if .Values.caCertificates }} + - name: ca-certificates + mountPath: /etc/ssl/certs/root-ca-bundle.crt + subPath: root-ca-bundle.crt + {{- end }} + {{ with .Values.h2ogpt.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: {{ include "h2ogpt.fullname" . }}-volume + {{- if not .Values.h2ogpt.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-volume + {{- else}} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.h2ogpt.storage.size | quote }} + storageClassName: {{ .Values.h2ogpt.storage.class }} + {{- end }} + {{- if .Values.caCertificates }} + - name: ca-certificates + configMap: + name: {{ include "h2ogpt.fullname" . }}-ca-certificates + {{- end }} + {{- with .Values.h2ogpt.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml new file mode 100644 index 000000000..bd6e7141f --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml @@ -0,0 +1,14 @@ +{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.h2ogpt.storage.class }} + resources: + requests: + storage: {{ .Values.h2ogpt.storage.size | quote }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml new file mode 100644 index 000000000..7e9f13bb9 --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml @@ -0,0 +1,37 @@ +{{- if .Values.h2ogpt.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-web + namespace: {{ include "h2ogpt.namespace" . | quote }} + + {{- with .Values.h2ogpt.service.webServiceAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }} + ports: + - name: http + protocol: TCP + port: {{ .Values.h2ogpt.service.webPort }} + targetPort: 7860 + {{- if .Values.h2ogpt.openai.enabled }} + - name: openai + protocol: TCP + port: {{ .Values.h2ogpt.service.openaiPort }} + targetPort: 5000 + {{- end }} + - name: function + protocol: TCP + port: {{ .Values.h2ogpt.service.functionPort }} + targetPort: 5002 + {{- if .Values.h2ogpt.agent.enabled }} + - name: agent + protocol: TCP + port: {{ .Values.h2ogpt.service.agentPort }} + targetPort: 5004 + {{- end }} + type: {{ .Values.h2ogpt.service.type }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/service.yaml b/helm/h2ogpt-chart/templates/service.yaml deleted file mode 100644 index 8d3ddb73d..000000000 --- a/helm/h2ogpt-chart/templates/service.yaml +++ /dev/null @@ -1,97 +0,0 @@ -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-web - namespace: {{ include "h2ogpt.namespace" . | quote }} - - {{- with .Values.h2ogpt.service.webServiceAnnotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }} - ports: - - name: http - protocol: TCP - port: {{ .Values.h2ogpt.service.webPort }} - targetPort: 7860 - - name: openai - protocol: TCP - port: {{ .Values.h2ogpt.service.openaiPort }} - targetPort: 5000 - - name: function - protocol: TCP - port: {{ .Values.h2ogpt.service.functionPort }} - targetPort: 5002 - - name: agent - protocol: TCP - port: {{ .Values.h2ogpt.service.agentsPort }} - targetPort: 5004 - type: {{ .Values.h2ogpt.service.type }} -{{- end }} ---- -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }} - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }} - ports: - - protocol: TCP - port: {{ .Values.h2ogpt.service.gptPort }} - targetPort: 8888 - type: {{ .Values.h2ogpt.service.type }} -{{- end }} ---- -{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - ports: - - protocol: TCP - port: {{ .Values.tgi.service.port }} - targetPort: 80 - type: {{ .Values.tgi.service.type }} -{{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference - ports: - - protocol: TCP - port: {{ .Values.vllm.service.port }} - targetPort: 5000 - type: {{ .Values.vllm.service.type }} -{{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - ports: - - protocol: TCP - port: {{ .Values.lmdeploy.service.port }} - targetPort: 23333 - type: {{ .Values.lmdeploy.service.type }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/validators.yaml b/helm/h2ogpt-chart/templates/validators.yaml new file mode 100644 index 000000000..49fb1532b --- /dev/null +++ b/helm/h2ogpt-chart/templates/validators.yaml @@ -0,0 +1,3 @@ +{{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agent.enabled) .Values.agent.enabled }} + {{- fail " Both agent and h2ogpt.agent cannot be enabled. Enably only one and try again" }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-configmap.yaml b/helm/h2ogpt-chart/templates/vllm-configmap.yaml new file mode 100644 index 000000000..66c187b3c --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-configmap.yaml @@ -0,0 +1,13 @@ +{{- if .Values.vllm.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.vllm.overrideConfig }} + {{ printf "%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml new file mode 100644 index 000000000..755a87aac --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml @@ -0,0 +1,149 @@ +{{- if .Values.vllm.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference +spec: + replicas: {{ .Values.vllm.replicaCount }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference + {{- if .Values.vllm.updateStrategy }} + strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.vllm.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference + {{- with .Values.vllm.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.vllm.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.vllm.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.vllm.podAffinity }} + podAntiAffinity: + {{- if .Values.vllm.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.vllm.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.vllm.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.vllm.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference + securityContext: + {{- toYaml .Values.vllm.securityContext | nindent 12 }} + image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} + command: ["python3"] + args: + - "-m" + - "vllm.entrypoints.openai.api_server" + - "--port" + - "5000" + - "--host" + - "0.0.0.0" + - "--download-dir" + - "/workspace/.cache/huggingface/hub" +{{- range $arg := .Values.vllm.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 5000 + protocol: TCP + {{- if .Values.vllm.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.vllm.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.vllm.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config + env: + - name: NCCL_IGNORE_DISABLED_P2P + value: "1" + {{- range $key, $value := .Values.vllm.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + mountPath: /workspace/.cache + subPath: cache + - name: shm + mountPath: /dev/shm + volumes: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- if not .Values.vllm.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + {{- end }} + - emptyDir: + medium: Memory + sizeLimit: 10.24Gi + name: shm +{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-pvc.yaml b/helm/h2ogpt-chart/templates/vllm-pvc.yaml new file mode 100644 index 000000000..fe26f08ea --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-pvc.yaml @@ -0,0 +1,16 @@ +--- +{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + # storageClassName: {{ .Values.vllm.storage.class | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml new file mode 100644 index 000000000..980d998cd --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-service.yaml @@ -0,0 +1,15 @@ +{{- if .Values.vllm.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference + ports: + - protocol: TCP + port: {{ .Values.vllm.service.port }} + targetPort: 5000 + type: {{ .Values.vllm.service.type }} +{{- end }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index b0e599bf4..7b7644dca 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -2,109 +2,101 @@ nameOverride: "" fullnameOverride: "" namespaceOverride: "" +global: + externalLLM: + enabled: false + # -- list of secrets for h2ogpt and agent env + secret: {} +# OPENAI_AZURE_KEY: "value" +# OPENAI_AZURE_API_BASE: "value" +# OPENAI_API_KEY: "value" +# REPLICATE_API_TOKEN: "value" + + modelLock: + + visionModels: + # -- Enable vision models + enabled: false + # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] + visibleModels: [ ] + rotateAlignResizeImage: false + h2ogpt: + # -- Enable h2oGPT enabled: true - stack: - # -- Run h2oGPT and vLLM on same pod. - enabled: false + # -- Enable agent + agent: + # -- Run agent with h2oGPT container + enabled: false + agent_workers: 5 + openai: + enabled: true + openai_workers: 5 replicaCount: 1 - imagePullSecrets: + imagePullSecrets: image: repository: gcr.io/vorvan/h2oai/h2ogpt-runtime - tag: + tag: pullPolicy: IfNotPresent initImage: repository: tag: pullPolicy: - - # extra volumes, for more certs, mount under /etc/ssl/more-certs + # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs extraVolumes: [] + # -- Extra volume mounts extraVolumeMounts: [] - - podAffinity: # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. + podAffinity: # hostname: # zone: - storage: size: 128Gi - class: + class: useEphemeral: true - - externalLLM: - enabled: false - secret: - modelLock: - - openAIAzure: - enabled: false - - openAI: - enabled: False - - replicate: - enabled: false - - visionModels: - enabled: false - # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model - # -- Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] - visibleModels: [] - rotateAlignResizeImage: false - -# -- Example configs to use when not using Model Lock and External LLM - # overrideConfig: - # base_model: h2oai/h2ogpt-4096-llama2-7b-chat - # use_safetensors: True - # prompt_type: llama2 - # save_dir: /workspace/save/ - # use_gpu_id: False - # score_model: None - # max_max_new_tokens: 2048 - # max_new_tokens: 1024 - - overrideConfig: - visible_login_tab: False - visible_system_tab: False - visible_models_tab: False - visible_hosts_tab: False - # change below to valid vision model or remove this entry - #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']" - rotate_align_resize_image: False - concurrency_count: 100 - top_k_docs_max_show: 100 - num_async: 10 - # change below to valid directory or remove this entry - #save_dir: "/docker_logs" - score_model: "None" - enable_tts: False - enable_stt: False - enable_transcriptions: False - embedding_gpu_id: "cpu" - hf_embedding_model: "fake" - openai_server: True - share: False - enforce_h2ogpt_api_key: True - enforce_h2ogpt_ui_key: False - # change to something secure for ui access to backend - #h2ogpt_api_keys: "['api_key_change_me']" - metadata_in_context: "" - # change or remove if using model hub - #use_auth_token: "hf_xxxxx" - # change below to first visible model or remove this entry - #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']" - # change so ui or api cannot access without this password - #admin_pass: "admin_password_change_me" + # -- Defaults configs are set internally with recommended values. Set values if you really need to change. + # -- Supported configs are commented. If you don't pass any value, keep {} + overrideConfig: {} +# verbose: +# heap_app_id: +# num_async: +# save_dir: +# score_model: +# share: +# enforce_h2ogpt_api_key: +# enforce_h2ogpt_ui_key: +# h2ogpt_api_keys: +# use_auth_token: +# visible_models: +# top_k_docs_max_show: +# admin_pass: +# function_server: +# function_server_workers: +# multiple_workers_gunicorn: +# llava_model: +# enable_llava: +# enable_tts: +# enable_stt: +# enable_transcriptions: +# asr_model: +# pre_load_embedding_model: +# pre_load_image_audio_models: +# cut_distance: +# hf_embedding_model: +# enable_captions: +# enable_doctr: +# embedding_gpu_id: + + # -- You can pass additional config here if overrideConfig does not have it. + additionalConfig: {} service: type: NodePort webPort: 80 openaiPort: 5000 functionPort: 5002 - agentsPort: 5004 - gptPort: 8888 + agentPort: 5004 webServiceAnnotations: {} updateStrategy: @@ -112,9 +104,9 @@ h2ogpt: podSecurityContext: runAsNonRoot: true - runAsUser: - runAsGroup: - fsGroup: + runAsUser: + runAsGroup: + fsGroup: securityContext: runAsNonRoot: true @@ -126,59 +118,133 @@ h2ogpt: type: RuntimeDefault resources: - nodeSelector: - tolerations: + requests: + memory: 32Gi + nvidia.com/gpu: 0 + limits: + memory: 64Gi + nvidia.com/gpu: 0 + # -- Node taints to tolerate by the h2ogpt pods. + tolerations: [] + # -- Node selector for the h2ogpt pods. + nodeSelector: {} env: {} podAnnotations: {} podLabels: {} - autoscaling: {} -tgi: - enabled: false +agent: + # -- Enable agent, this must be `false` if `h2ogpt.agent.enabled` is `true` + enabled: true + agent_workers: 5 + autoscaling: + # Enable autoscaling (HPA) for agent + enabled: false + minReplicas: 1 + maxReplicas: 2 + targetMemory: 32Gi + targetCPU: 80 replicaCount: 1 - + imagePullSecrets: image: - repository: ghcr.io/huggingface/text-generation-inference - tag: 0.9.3 + repository: gcr.io/vorvan/h2oai/h2ogpt-runtime + tag: pullPolicy: IfNotPresent - + initImage: + repository: + tag: + pullPolicy: + # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs + extraVolumes: [] + # -- Extra volume mounts + extraVolumeMounts: [] + # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. podAffinity: - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. - # hostname: - # zone: + # hostname: + # zone: storage: - size: 512Gi - class: + size: 128Gi + class: useEphemeral: true - - overrideConfig: - hfSecret: - containerArgs: + + # -- Defaults configs are set internally with recommended values. Set values if you really need to change. + # -- Supported configs are commented. If you don't pass any value, keep {} + overrideConfig: {} +# verbose: +# heap_app_id: +# num_async: +# save_dir: +# score_model: +# share: +# enforce_h2ogpt_api_key: +# enforce_h2ogpt_ui_key: +# h2ogpt_api_keys: +# use_auth_token: +# visible_models: +# top_k_docs_max_show: +# admin_pass: +# multiple_workers_gunicorn: +# llava_model: +# enable_llava: +# enable_tts: +# enable_stt: +# enable_transcriptions: +# asr_model: +# pre_load_embedding_model: +# pre_load_image_audio_models: +# cut_distance: +# hf_embedding_model: +# enable_captions: +# enable_doctr: +# embedding_gpu_id: + + # -- You can pass additional config here if overrideConfig does not have it. + additionalConfig: {} service: - type: ClusterIP - port: 8080 + type: NodePort + agentPort: 5004 + annotations: {} updateStrategy: type: RollingUpdate podSecurityContext: + runAsNonRoot: true + runAsUser: + runAsGroup: + fsGroup: + securityContext: + runAsNonRoot: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault resources: - nodeSelector: - tolerations: + requests: + memory: 32Gi + nvidia.com/gpu: 1 + limits: + memory: 64Gi + nvidia.com/gpu: 1 + # -- Node taints to tolerate by the agent pods. + tolerations: [] + # -- Node selector for the agent pods. + nodeSelector: {} env: {} podAnnotations: {} podLabels: {} - autoscaling: {} vllm: + # -- Enable vllm enabled: false replicaCount: 1 @@ -186,9 +252,9 @@ vllm: repository: vllm/vllm-openai tag: latest pullPolicy: IfNotPresent - + # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. podAffinity: - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. + # hostname: # zone: @@ -245,51 +311,7 @@ vllm: podAnnotations: {} podLabels: {} - autoscaling: {} - -lmdeploy: - enabled: false - replicaCount: 1 - - image: - repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy - tag: - pullPolicy: IfNotPresent - podAffinity: - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. - # hostname: - # zone: - - storage: - size: 512Gi - class: - useEphemeral: true - - overrideConfig: - hfSecret: - containerArgs: - - "OpenGVLab/InternVL-Chat-V1-5" - - service: - type: ClusterIP - port: 23333 - - updateStrategy: - type: RollingUpdate - - podSecurityContext: - securityContext: - - resources: - nodeSelector: - tolerations: - - env: {} - - podAnnotations: {} - podLabels: {} - autoscaling: {} # -- CA certs caCertificates: ""