[CLOUD-576] Drop support for GPU worker nodes in the cloud

logicalclocks · Sep 28, 2023 · b862360 · b862360
1 parent 8c570f1
commit b862360
Show file tree

Hide file tree

Showing 24 changed files with 78 additions and 675 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,7 +5,7 @@ NOTES:
 BREAKING CHANGES:
 
 ENHANCEMENTS:
-
+* Drop support for GPU workers on Spark
 FEATURES:
 
 BUG FIXES:

diff --git a/docs/data-sources/cluster.md b/docs/data-sources/cluster.md
@@ -63,32 +63,8 @@ data "hopsworksai_clusters" "cluster" {
 
 Read-Only:
 
-- `gpu_workers` (List of Object) (see [below for nested schema](#nestedobjatt--autoscale--gpu_workers))
 - `non_gpu_workers` (List of Object) (see [below for nested schema](#nestedobjatt--autoscale--non_gpu_workers))
 
-<a id="nestedobjatt--autoscale--gpu_workers"></a>
-### Nested Schema for `autoscale.gpu_workers`
-
-Read-Only:
-
-- `disk_size` (Number)
-- `downscale_wait_time` (Number)
-- `instance_type` (String)
-- `max_workers` (Number)
-- `min_workers` (Number)
-- `spot_config` (List of Object) (see [below for nested schema](#nestedobjatt--autoscale--gpu_workers--spot_config))
-- `standby_workers` (Number)
-
-<a id="nestedobjatt--autoscale--gpu_workers--spot_config"></a>
-### Nested Schema for `autoscale.gpu_workers.spot_config`
-
-Read-Only:
-
-- `fall_back_on_demand` (Boolean)
-- `max_price_percent` (Number)
-
-
-
 <a id="nestedobjatt--autoscale--non_gpu_workers"></a>
 ### Nested Schema for `autoscale.non_gpu_workers`
 

diff --git a/docs/data-sources/clusters.md b/docs/data-sources/clusters.md
@@ -87,32 +87,8 @@ Read-Only:
 
 Read-Only:
 
-- `gpu_workers` (List of Object) (see [below for nested schema](#nestedobjatt--clusters--autoscale--gpu_workers))
 - `non_gpu_workers` (List of Object) (see [below for nested schema](#nestedobjatt--clusters--autoscale--non_gpu_workers))
 
-<a id="nestedobjatt--clusters--autoscale--gpu_workers"></a>
-### Nested Schema for `clusters.autoscale.gpu_workers`
-
-Read-Only:
-
-- `disk_size` (Number)
-- `downscale_wait_time` (Number)
-- `instance_type` (String)
-- `max_workers` (Number)
-- `min_workers` (Number)
-- `spot_config` (List of Object) (see [below for nested schema](#nestedobjatt--clusters--autoscale--gpu_workers--spot_config))
-- `standby_workers` (Number)
-
-<a id="nestedobjatt--clusters--autoscale--gpu_workers--spot_config"></a>
-### Nested Schema for `clusters.autoscale.gpu_workers.standby_workers`
-
-Read-Only:
-
-- `fall_back_on_demand` (Boolean)
-- `max_price_percent` (Number)
-
-
-
 <a id="nestedobjatt--clusters--autoscale--non_gpu_workers"></a>
 ### Nested Schema for `clusters.autoscale.non_gpu_workers`
 

diff --git a/docs/data-sources/instance_type.md b/docs/data-sources/instance_type.md
@@ -42,7 +42,6 @@ data "hopsworksai_instance_type" "supported_type" {
 ### Optional
 
 - `min_cpus` (Number) Filter based on the minimum number of CPU cores. Defaults to `0`.
-- `min_gpus` (Number) Filter based on the minimum number of GPUs. Defaults to `0`.
 - `min_memory_gb` (Number) Filter based on the minimum memory in gigabytes. Defaults to `0`.
 - `with_nvme` (Boolean) Filter based on the presence of NVMe drives. Defaults to `false`.
 

diff --git a/docs/data-sources/instance_types.md b/docs/data-sources/instance_types.md
@@ -41,7 +41,6 @@ data "hopsworksai_instance_types" "supported_worker_types" {
 Read-Only:
 
 - `cpus` (Number)
-- `gpus` (Number)
 - `id` (String)
 - `memory` (Number)
 - `with_nvme` (Boolean)
diff --git a/docs/resources/cluster.md b/docs/resources/cluster.md
@@ -294,10 +294,6 @@ Required:
 
 - `non_gpu_workers` (Block List, Min: 1, Max: 1) Setup auto scaling for non gpu nodes. (see [below for nested schema](#nestedblock--autoscale--non_gpu_workers))
 
-Optional:
-
-- `gpu_workers` (Block List, Max: 1) Setup auto scaling for gpu nodes. (see [below for nested schema](#nestedblock--autoscale--gpu_workers))
-
 <a id="nestedblock--autoscale--non_gpu_workers"></a>
 ### Nested Schema for `autoscale.non_gpu_workers`
 
@@ -324,32 +320,6 @@ Optional:
 
 
 
-<a id="nestedblock--autoscale--gpu_workers"></a>
-### Nested Schema for `autoscale.gpu_workers`
-
-Required:
-
-- `instance_type` (String) The instance type to use while auto scaling.
-
-Optional:
-
-- `disk_size` (Number) The disk size to use while auto scaling Defaults to `512`.
-- `downscale_wait_time` (Number) The time to wait before removing unused resources. Defaults to `300`.
-- `max_workers` (Number) The maximum number of workers created by auto scaling. Defaults to `10`.
-- `min_workers` (Number) The minimum number of workers created by auto scaling. Defaults to `0`.
-- `spot_config` (Block List, Max: 1) The configuration to use spot instances (see [below for nested schema](#nestedblock--autoscale--gpu_workers--spot_config))
-- `standby_workers` (Number) The percentage of workers to be always available during auto scaling. If you set this value to 0 new workers will only be added when a job or a notebook requests the resources. This attribute will not be taken into account if you set the minimum number of workers to 0 and no resources are used in the cluster, instead, it will start to take effect as soon as you start using resources. Defaults to `0.5`.
-
-<a id="nestedblock--autoscale--gpu_workers--spot_config"></a>
-### Nested Schema for `autoscale.gpu_workers.spot_config`
-
-Optional:
-
-- `fall_back_on_demand` (Boolean) Fall back to on demand instance if unable to allocate a spot instance Defaults to `true`.
-- `max_price_percent` (Number) The maximum spot instance price in percentage of the on-demand price. Defaults to `100`.
-
-
-
 
 <a id="nestedblock--aws_attributes"></a>
 ### Nested Schema for `aws_attributes`

diff --git a/docs/resources/cluster_from_backup.md b/docs/resources/cluster_from_backup.md
@@ -70,10 +70,6 @@ Required:
 
 - `non_gpu_workers` (Block List, Min: 1, Max: 1) Setup auto scaling for non gpu nodes. (see [below for nested schema](#nestedblock--autoscale--non_gpu_workers))
 
-Optional:
-
-- `gpu_workers` (Block List, Max: 1) Setup auto scaling for gpu nodes. (see [below for nested schema](#nestedblock--autoscale--gpu_workers))
-
 <a id="nestedblock--autoscale--non_gpu_workers"></a>
 ### Nested Schema for `autoscale.non_gpu_workers`
 
@@ -100,32 +96,6 @@ Optional:
 
 
 
-<a id="nestedblock--autoscale--gpu_workers"></a>
-### Nested Schema for `autoscale.gpu_workers`
-
-Required:
-
-- `instance_type` (String) The instance type to use while auto scaling.
-
-Optional:
-
-- `disk_size` (Number) The disk size to use while auto scaling Defaults to `512`.
-- `downscale_wait_time` (Number) The time to wait before removing unused resources. Defaults to `300`.
-- `max_workers` (Number) The maximum number of workers created by auto scaling. Defaults to `10`.
-- `min_workers` (Number) The minimum number of workers created by auto scaling. Defaults to `0`.
-- `spot_config` (Block List, Max: 1) The configuration to use spot instances (see [below for nested schema](#nestedblock--autoscale--gpu_workers--spot_config))
-- `standby_workers` (Number) The percentage of workers to be always available during auto scaling. If you set this value to 0 new workers will only be added when a job or a notebook requests the resources. This attribute will not be taken into account if you set the minimum number of workers to 0 and no resources are used in the cluster, instead, it will start to take effect as soon as you start using resources. Defaults to `0.5`.
-
-<a id="nestedblock--autoscale--gpu_workers--spot_config"></a>
-### Nested Schema for `autoscale.gpu_workers.spot_config`
-
-Optional:
-
-- `fall_back_on_demand` (Boolean) Fall back to on demand instance if unable to allocate a spot instance Defaults to `true`.
-- `max_price_percent` (Number) The maximum spot instance price in percentage of the on-demand price. Defaults to `100`.
-
-
-
 
 <a id="nestedblock--aws_attributes"></a>
 ### Nested Schema for `aws_attributes`

diff --git a/examples/complete/aws/autoscale/README.md b/examples/complete/aws/autoscale/README.md
@@ -44,15 +44,15 @@ terraform apply
 
 ## Update Autoscale 
 
-You can update the autoscale configuration after creations, by changing the `autoscale` configuration block. For example, you can configure autoscale for GPU workers as follows:
+You can update the autoscale configuration after creations, by changing the `autoscale` configuration block. For example, you can configure autoscale as follows:
 
 > **Notice** that you need to run `terraform apply` after updating your configuration for your changes to take place.
 
 ```hcl
-data "hopsworksai_instance_type" "gpu_worker" {
+data "hopsworksai_instance_type" "small_worker" {
   cloud_provider = "AWS"
   node_type      = "worker"
-  min_gpus = 1
+  min_cpus       = 8
 }
 
 resource "hopsworksai_cluster" "cluster" {
@@ -67,15 +67,6 @@ resource "hopsworksai_cluster" "cluster" {
       standby_workers = 0.5
       downscale_wait_time = 300
     }
-
-    gpu_workers {
-      instance_type = data.hopsworksai_instance_type.gpu_worker.id
-      disk_size = 256
-      min_workers = 0
-      max_workers = 5
-      standby_workers = 0.5
-      downscale_wait_time = 300
-    }
   }
 
 }

diff --git a/examples/complete/aws/basic/README.md b/examples/complete/aws/basic/README.md
@@ -82,13 +82,13 @@ resource "hopsworksai_cluster" "cluster" {
 }
 ```
 
-You can add a new different worker type for example another worker with at least one gpu as follows:
+You can add a new different worker type for example another worker with at least 16 cpu cores as follows:
 
 ```hcl
-data "hopsworksai_instance_type" "gpu_worker" {
+data "hopsworksai_instance_type" "my_worker" {
   cloud_provider = "AWS"
   node_type      = "worker"
-  min_gpus = 1
+  min_cpus       = 16
 }
 
 resource "hopsworksai_cluster" "cluster" {
@@ -101,7 +101,7 @@ resource "hopsworksai_cluster" "cluster" {
   }
 
   workers {
-    instance_type = data.hopsworksai_instance_type.gpu_worker.id
+    instance_type = data.hopsworksai_instance_type.my_worker.id
     disk_size = 512
     count = 1
   }

diff --git a/examples/complete/azure/autoscale/README.md b/examples/complete/azure/autoscale/README.md
@@ -44,38 +44,29 @@ terraform apply -var="resource_group=<YOUR_RESOURCE_GROUP>"
 
 ## Update Autoscale 
 
-You can update the autoscale configuration after creations, by changing the `autoscale` configuration block. For example, you can configure autoscale for GPU workers as follows:
+You can update the autoscale configuration after creations, by changing the `autoscale` configuration block. For example, you can configure your own worker as follows:
 
 > **Notice** that you need to run `terraform apply` after updating your configuration for your changes to take place.
 
 ```hcl
-data "hopsworksai_instance_type" "gpu_worker" {
+data "hopsworksai_instance_type" "my_worker" {
   cloud_provider = "AZURE"
   node_type      = "worker"
-  min_gpus = 1
+  min_cpus       = 16
 }
 
 resource "hopsworksai_cluster" "cluster" {
   # all the other configurations are omitted for clarity 
 
   autoscale {
     non_gpu_workers {
-      instance_type = data.hopsworksai_instance_type.small_worker.id
+      instance_type = data.hopsworksai_instance_type.my_worker.id
       disk_size = 256
       min_workers = 0
       max_workers = 10
       standby_workers = 0.5
       downscale_wait_time = 300
     }
-
-    gpu_workers {
-      instance_type = data.hopsworksai_instance_type.gpu_worker.id
-      disk_size = 256
-      min_workers = 0
-      max_workers = 5
-      standby_workers = 0.5
-      downscale_wait_time = 300
-    }
   }
 
 }

diff --git a/examples/complete/azure/basic/README.md b/examples/complete/azure/basic/README.md
@@ -82,13 +82,13 @@ resource "hopsworksai_cluster" "cluster" {
 }
 ```
 
-You can add a new different worker type for example another worker with at least one gpu as follows:
+You can add a new different worker type for example another worker with at least 16 cpu cores follows:
 
 ```hcl
-data "hopsworksai_instance_type" "gpu_worker" {
+data "hopsworksai_instance_type" "my_worker" {
   cloud_provider = "AZURE"
   node_type      = "worker"
-  min_gpus = 1
+  min_cpus       = 16
 }
 
 resource "hopsworksai_cluster" "cluster" {
@@ -101,7 +101,7 @@ resource "hopsworksai_cluster" "cluster" {
   }
 
   workers {
-    instance_type = data.hopsworksai_instance_type.gpu_worker.id
+    instance_type = data.hopsworksai_instance_type.my_worker.id
     disk_size = 512
     count = 1
   }

diff --git a/hopsworksai/data_source_instance_type.go b/hopsworksai/data_source_instance_type.go
@@ -46,13 +46,6 @@ func dataSourceInstanceType() *schema.Resource {
 				Default:      0,
 				ValidateFunc: validation.IntAtLeast(0),
 			},
-			"min_gpus": {
-				Description:  "Filter based on the minimum number of GPUs.",
-				Type:         schema.TypeInt,
-				Optional:     true,
-				Default:      0,
-				ValidateFunc: validation.IntAtLeast(0),
-			},
 			"with_nvme": {
 				Description: "Filter based on the presence of NVMe drives.",
 				Type:        schema.TypeBool,
@@ -85,7 +78,6 @@ func dataSourceInstanceTypeRead(ctx context.Context, d *schema.ResourceData, met
 
 	minMemory := d.Get("min_memory_gb").(float64)
 	minCPUs := d.Get("min_cpus").(int)
-	minGPUs := d.Get("min_gpus").(int)
 	withNVMe := d.Get("with_nvme").(bool)
 
 	var chosenType *api.SupportedInstanceType = nil
@@ -96,9 +88,6 @@ func dataSourceInstanceTypeRead(ctx context.Context, d *schema.ResourceData, met
 		if minCPUs > 0 && v.CPUs < minCPUs {
 			continue
 		}
-		if minGPUs > 0 && v.GPUs < minGPUs {
-			continue
-		}
 		if withNVMe != v.WithNVMe {
 			continue
 		}