From ce3b1a90670d3acede513ad7d8ce11958bc3a1fd Mon Sep 17 00:00:00 2001 From: Mahmoud Ismail Date: Tue, 1 Aug 2023 17:55:02 +0300 Subject: [PATCH] [CLOUD-607] Add an advanced example for ArrowFlight setup --- CHANGELOG.md | 1 + examples/complete/aws/advanced/README.md | 5 + .../README.md | 23 ++ .../main.tf | 335 ++++++++++++++++++ .../variables.tf | 19 + .../versions.tf | 13 + 6 files changed, 396 insertions(+) create mode 100644 examples/complete/aws/advanced/README.md create mode 100644 examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/README.md create mode 100644 examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/main.tf create mode 100644 examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/variables.tf create mode 100644 examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/versions.tf diff --git a/CHANGELOG.md b/CHANGELOG.md index b61d1c0..85e6077 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ ENHANCEMENTS: * test-fixtures: run against dev and prod envs * resource/hopsworksai_cluster: Set Default `version` to 3.3.0 * Bump minimum Go version to 1.19 +* examples: add an advanced aws example to setup a Hopsworks cluster with ArrowFlight server FEATURES: diff --git a/examples/complete/aws/advanced/README.md b/examples/complete/aws/advanced/README.md new file mode 100644 index 0000000..8f59647 --- /dev/null +++ b/examples/complete/aws/advanced/README.md @@ -0,0 +1,5 @@ +# Hopsworks.ai AWS Advanced Examples + +In this directory, we have the following examples: + +1. [Cluster with ArrowFlight+DuckDB and no load balancer permission](./arrowflight-no-loadbalancer-permissions) diff --git a/examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/README.md b/examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/README.md new file mode 100644 index 0000000..61b9404 --- /dev/null +++ b/examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/README.md @@ -0,0 +1,23 @@ +# Hopsworks cluster with ArrowFlight server with no load balancer permissions + +In this example, we create a Hopsworks cluster with arrow flight server enabled. This example assumes that the users have removed the permissions for Hopsworks.ai to manage load balancer on their behalf. If you have given Hopsworks.ai the manage load balancer permissions as shown in [the docs](https://docs.hopsworks.ai/latest/setup_installation/aws/restrictive_permissions/#load-balancers-permissions-for-external-access), then there is no need to run this example and instead you can directly just set the `rondb/mysql_nodes/arrow_flight_with_duckdb` attribute to true in your terraform cluster confiugraiton. + + +## How to run the example +First ensure that your aws credentials are setup correctly by running the following command + +```bash +aws configure +``` + +Then, run the following commands. Replace the placeholder with your Hopsworks API Key. The cluster will be created in us-east-2 region by default, however, you can configure which region to use by setting the variable region when applying the changes `-var="region=YOUR_REGION"` + +```bash +export HOPSWORKSAI_API_KEY= +terraform init +terraform apply +``` + +## Terminate the cluster + +You can run `terraform destroy` to delete the cluster and all the other required cloud resources created in this example. \ No newline at end of file diff --git a/examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/main.tf b/examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/main.tf new file mode 100644 index 0000000..70187ab --- /dev/null +++ b/examples/complete/aws/advanced/arrowflight-no-loadbalancer-permissions/main.tf @@ -0,0 +1,335 @@ +provider "aws" { + region = var.region + profile = var.profile +} + +provider "hopsworksai" { + +} + + +# Step 1: Create required aws resources, an ssh key, an s3 bucket, and an instance profile with the required hopsworks permissions +module "aws" { + source = "logicalclocks/helpers/hopsworksai//modules/aws" + region = var.region + version = "2.3.0" +} + + +# Step 2: Create a VPC +data "aws_availability_zones" "available" { +} + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "3.1.0" + + name = "${var.cluster_name}-vpc" + cidr = "172.16.0.0/16" + azs = data.aws_availability_zones.available.names + public_subnets = ["172.16.4.0/24"] + enable_dns_hostnames = true +} + +# Step 3: Create a security group and open required ports +resource "aws_security_group" "security_group" { + name = "${var.cluster_name}-security-group" + description = "Allow access for Hopsworks cluster" + vpc_id = module.vpc.vpc_id + + ingress { + description = "HTTPS" + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "HTTP" + from_port = 80 + to_port = 80 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "MYSQL" + from_port = 3306 + to_port = 3306 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "ArrowFlight" + from_port = 5005 + to_port = 5005 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "HiveServer" + from_port = 9085 + to_port = 9085 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "HiveMetastore" + from_port = 9083 + to_port = 9083 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "Kafka" + from_port = 9092 + to_port = 9092 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 0 + to_port = 0 + protocol = -1 + self = true + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + ipv6_cidr_blocks = ["::/0"] + } +} + + +# Step 3: Create a network load balancer +resource "aws_lb" "lb" { + name = "${var.cluster_name}-lb" + internal = false + load_balancer_type = "network" + subnets = module.vpc.public_subnets +} + +# Step 4: create a cluster with 1 worker +data "hopsworksai_instance_type" "head" { + cloud_provider = "AWS" + node_type = "head" + region = var.region +} + +data "hopsworksai_instance_type" "rondb_mgm" { + cloud_provider = "AWS" + node_type = "rondb_management" + region = var.region +} + +data "hopsworksai_instance_type" "rondb_data" { + cloud_provider = "AWS" + node_type = "rondb_data" + region = var.region +} + +data "hopsworksai_instance_type" "rondb_mysql" { + cloud_provider = "AWS" + node_type = "rondb_mysql" + region = var.region + min_cpus = 8 + min_memory_gb = 16 +} + +data "hopsworksai_instance_type" "smallest_worker" { + cloud_provider = "AWS" + node_type = "worker" + region = var.region + min_cpus = 8 +} + +resource "hopsworksai_cluster" "cluster" { + name = var.cluster_name + ssh_key = module.aws.ssh_key_pair_name + + head { + instance_type = data.hopsworksai_instance_type.head.id + } + + workers { + instance_type = data.hopsworksai_instance_type.smallest_worker.id + count = 1 + } + + aws_attributes { + region = var.region + bucket { + name = module.aws.bucket_name + } + instance_profile_arn = module.aws.instance_profile_arn + network { + vpc_id = module.vpc.vpc_id + subnet_id = module.vpc.public_subnets[0] + security_group_id = aws_security_group.security_group.id + } + } + + rondb { + configuration { + ndbd_default { + replication_factor = 2 + } + } + + management_nodes { + instance_type = data.hopsworksai_instance_type.rondb_mgm.id + disk_size = 30 + } + data_nodes { + instance_type = data.hopsworksai_instance_type.rondb_data.id + count = 2 + disk_size = 512 + } + mysql_nodes { + instance_type = data.hopsworksai_instance_type.rondb_mysql.id + count = var.num_mysql_servers + disk_size = 256 + arrow_flight_with_duckdb = true + } + } + + init_script = <