Merge pull request #98 from iandyh/split

Distributed mode phase 1
rakutentech · Jun 24, 2024 · a3e0cd3 · a3e0cd3
2 parents 81ccbf4 + 354fdb2
commit a3e0cd3
Show file tree

Hide file tree

Showing 21 changed files with 616 additions and 117 deletions.
diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml
@@ -0,0 +1,74 @@
+# This workflow will build a docker container, publish it to Google Container Registry, and deploy it to GKE when there is a push to the "master" branch.
+#
+# To configure this workflow:
+#
+# 1. Ensure that your repository contains the necessary configuration for your Google Kubernetes Engine cluster, including deployment.yml, kustomization.yml, service.yml, etc.
+#
+# 2. Create and configure a Workload Identity Provider for GitHub (https://github.com/google-github-actions/auth#setting-up-workload-identity-federation)
+#
+# 3. Change the values for the GAR_LOCATION, GKE_ZONE, GKE_CLUSTER, IMAGE, REPOSITORY and DEPLOYMENT_NAME environment variables (below).
+#
+# For more support on how to run the workflow, please visit https://github.com/google-github-actions/setup-gcloud/tree/master/example-workflows/gke-kustomize
+
+name: Build and Deploy to GCP registry
+
+on:
+  push:
+    branches: [ "split", "master" ]
+
+env:
+  GAR_LOCATION: asia-northeast1 # TODO: update region of the Artifact Registry
+  IMAGE: shibuya
+
+jobs:
+  setup-build-publish-deploy:
+    name: Setup, Build, Publish
+    runs-on: ubuntu-20.04
+    environment: production
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+
+    - name: Set up Go
+      uses: actions/setup-go@v4
+      with:
+        go-version: '1.17'
+
+    - id: 'auth'
+      uses: 'google-github-actions/auth@v1'
+      with:
+        credentials_json: '${{ secrets.GCP_CREDENTIALS }}'
+        token_format: 'access_token'
+
+    - name: Docker configuration
+      run: |-
+        echo '${{ steps.auth.outputs.access_token }}' | docker login -u oauth2accesstoken --password-stdin https://$GAR_LOCATION-docker.pkg.dev
+
+    # Build the Docker image
+    - name: Build api
+      run: |-
+        cd shibuya && make api_image component=api
+
+    - name: Build controller
+      run: |-
+        cd shibuya && make controller_image component=controller
+
+    - name: Configure Git
+      run: |
+        git config user.name "$GITHUB_ACTOR"
+        git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
+
+    - name: Install Helm
+      uses: azure/setup-helm@v3
+      with:
+        version: "v3.13.3"
+
+    - name: Run chart-releaser
+      uses: helm/chart-releaser-action@v1.5.0
+      with:
+        charts_dir: shibuya/install
+      env:
+        CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/README.md b/README.md
@@ -15,7 +15,8 @@ Collection is the unit where the actual tests are managed. Therefore, multiple t
 Pre-requisites:
 1. Kind (https://kind.sigs.k8s.io)
 2. kubectl (https://kubernetes.io/docs/tasks/tools/install-kubectl)
-3. Docker (https://docs.docker.com/install) *On OSX please increase your docker machine's spec or you may face performance issues*
+3. Helm (https://helm.sh/docs/intro/install/)
+4. Docker (https://docs.docker.com/install) *On OSX please increase your docker machine's spec or you may face performance issues*
 
 
 Run `make` to start local cluster
@@ -30,6 +31,17 @@ Then you can go to http://localhost:8080 to check.
 
 note: Local Shibuya does not have authentication. So you need to put `shibuya` as the ownership of the project. This is the same if you turn off authentication in the config file.
 
+## Distributed mode(WIP)
+
+In order to improve the scalibility of Shibuya, we are going to split the single Shibuya process into three components:
+
+- apiserver
+- controller.
+- Engine metric streamer(Not existing yet)
+
+By default, at locall, it will be run as non-distributed mode. You can enable to by set the `runtime.distributed_mode` to `true`.
+
+
 ### Production setup
 
 Please read the makefile to understand what components are needed and how to set them up in detail.
@@ -66,4 +78,4 @@ Please read the makefile to understand what components are needed and how to set
 
 - Adding more executor type support. For example, Gatling. Technically speaking, Shibuya can support any executor as long as the executor can provide real time metrics data in some way.
 - Manage muliple contexts in one controller.
-- Better Authentication
+- Better Authentication
diff --git a/kubernetes/shibuya.yaml b/kubernetes/shibuya.yaml
diff --git a/makefile b/makefile
@@ -35,11 +35,11 @@ grafana: grafana/
 
 .PHONY: shibuya
 shibuya: shibuya/ kubernetes/
-	cp shibuya/config_tmpl.json shibuya/config.json
 	cd shibuya && sh build.sh
-	docker build -f shibuya/docker-local/Dockerfile --build-arg env=local -t shibuya:local shibuya
-	kind load docker-image shibuya:local --name shibuya
-	kubectl -n $(shibuya-controller-ns) replace -f kubernetes/shibuya.yaml --force
+	docker build -f shibuya/Dockerfile --build-arg env=local -t api:local shibuya
+	kind load docker-image api:local --name shibuya
+	helm uninstall shibuya || true
+	helm upgrade --install shibuya install/shibuya
 
 .PHONY: jmeter
 jmeter: shibuya/engines/jmeter
@@ -84,4 +84,12 @@ ingress-controller:
 	# if you need to debug the controller, please use the makefile in the ingress controller folder
 	# And update the image in the config.json
 	docker build -t shibuya:ingress-controller -f ingress-controller/Dockerfile ingress-controller
-	kind load docker-image shibuya:ingress-controller --name shibuya
+	kind load docker-image shibuya:ingress-controller --name shibuya
+
+.PHONY: controller
+controller:
+	cd shibuya && sh build.sh controller
+	docker build -f shibuya/Dockerfile --build-arg env=local --build-arg="binary_name=shibuya-controller" -t controller:local shibuya
+	kind load docker-image controller:local --name shibuya
+	helm uninstall shibuya || true
+	helm upgrade --install shibuya install/shibuya
diff --git a/shibuya/.gitignore b/shibuya/.gitignore
@@ -0,0 +1,2 @@
+*.tgz
+shibuya-install/*
diff --git a/shibuya/Dockerfile b/shibuya/Dockerfile
@@ -1,41 +1,18 @@
-FROM gcr.io/shibuya-214807/golang:1.17-stretch AS builder
+FROM ubuntu:18.04
 
+RUN apt-get update && apt-get install -y curl
 RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl \
     && chmod +x ./kubectl \
     && mv ./kubectl /usr/local/bin/kubectl
 
-WORKDIR /go/src/shibuya
-
-ENV GO111MODULE on
-ADD go.mod .
-ADD go.sum .
-RUN go mod download
-
-COPY . /go/src/shibuya
-
-RUN GOOS=linux GOARCH=amd64 go build -ldflags="-w -s" -o /go/bin/shibuya
-
-# Use only binaries from above image for running the app
-FROM gcr.io/shibuya-214807/ubuntu:18.04
-
-COPY --from=builder /go/bin/shibuya /usr/local/bin/shibuya
-COPY --from=builder /usr/local/bin/kubectl /usr/local/bin/kubectl
-COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
-
-RUN mkdir /auth
-ADD ./shibuya-gcp.json /auth/shibuya-gcp.json
+ARG binary_name=shibuya
+ADD ./build/${binary_name} /usr/local/bin/${binary_name}
 
 ENV GOOGLE_APPLICATION_CREDENTIALS /auth/shibuya-gcp.json
 
 ARG env=local
 ENV env ${env}
-ARG lab_image=""
-ENV lab_image ${lab_image}
-ARG proxy=""
-ENV http_proxy ${proxy}
-ENV https_proxy ${proxy}
 
-COPY config/kube_configs /root/.kube
-COPY config.json /config.json
 COPY ./ui/ /
-CMD ["shibuya"]
+ENV binary=${binary_name}
+CMD ${binary}
diff --git a/shibuya/Makefile b/shibuya/Makefile
@@ -0,0 +1,27 @@
+registry=$(GAR_LOCATION)-docker.pkg.dev/$(GCP_PROJECT)
+repository = shibuya
+tag=$(GITHUB_SHA)
+img=$(registry)/$(repository)/$(component):$(tag)
+
+.PHONY: api_build
+api_build:
+	sh build.sh
+
+.PHONY: api_image
+api_image: api_build
+	docker build -t $(img) -f Dockerfile .
+	docker push $(img)
+
+.PHONY: controller_build
+controller_build:
+	sh build.sh controller
+
+.PHONY: controller_image
+controller_image: controller_build
+	docker build -t $(img) -f Dockerfile --build-arg="binary_name=shibuya-controller" .
+	docker push $(img)
+
+.PHONY: helm_charts
+helm_charts:
+	helm create shibuya-install
+	helm package shibuya-install/
diff --git a/shibuya/api/main.go b/shibuya/api/main.go
@@ -27,9 +27,11 @@ type ShibuyaAPI struct {
 }
 
 func NewAPIServer() *ShibuyaAPI {
-	return &ShibuyaAPI{
+	c := &ShibuyaAPI{
 		ctr: controller.NewController(),
 	}
+	c.ctr.StartRunning()
+	return c
 }
 
 type JSONMessage struct {

diff --git a/shibuya/build.sh b/shibuya/build.sh
@@ -8,6 +8,8 @@ go mod download
 case "$target" in
     "jmeter") GOOS=linux GOARCH=amd64 go build -ldflags="-w -s" -o build/shibuya-agent $(pwd)/engines/jmeter
     ;;
+    "controller") GOOS=linux GOARCH=amd64 go build -ldflags="-w -s" -o build/shibuya-controller $(pwd)/controller/cmd
+    ;;
     *)
     GOOS=linux GOARCH=amd64 go build -ldflags="-w -s" -o build/shibuya
 esac
diff --git a/shibuya/config/init.go b/shibuya/config/init.go
@@ -120,6 +120,7 @@ var defaultIngressConfig = IngressConfig{
 type ShibuyaConfig struct {
 	ProjectHome      string           `json:"project_home"`
 	UploadFileHelp   string           `json:"upload_file_help"`
+	DistributedMode  bool             `json:"distributed_mode"`
 	DBConf           *MySQLConfig     `json:"db"`
 	ExecutorConfig   *ExecutorConfig  `json:"executors"`
 	DashboardConfig  *DashboardConfig `json:"dashboard"`

diff --git a/shibuya/controller/cmd/main.go b/shibuya/controller/cmd/main.go
@@ -0,0 +1,14 @@
+package main
+
+import (
+	"github.com/rakutentech/shibuya/shibuya/controller"
+	log "github.com/sirupsen/logrus"
+)
+
+// This func keep tracks of all the running engines. They should just rely on the data in the db
+// and make necessary queries to the scheduler.
+func main() {
+	log.Info("Controller is running in distributed mode")
+	controller := controller.NewController()
+	controller.IsolateBackgroundTasks()
+}
diff --git a/shibuya/controller/garbage.go b/shibuya/controller/garbage.go
@@ -10,7 +10,7 @@ import (
 	log "github.com/sirupsen/logrus"
 )
 
-func (c *Controller) checkRunningThenTerminate() {
+func (c *Controller) CheckRunningThenTerminate() {
 	jobs := make(chan *RunningPlan)
 	for w := 1; w <= 3; w++ {
 		go func(jobs <-chan *RunningPlan) {
@@ -40,6 +40,7 @@ func (c *Controller) checkRunningThenTerminate() {
 	for {
 		runningPlans, err := model.GetRunningPlans()
 		if err != nil {
+			log.Error(err)
 			continue
 		}
 		localCache := make(map[int64]*model.Collection)
@@ -107,7 +108,8 @@ func isCollectionStale(rh *model.RunHistory, launchTime time.Time) (bool, error)
 	return true, nil
 }
 
-func (c *Controller) autoPurgeDeployments() {
+func (c *Controller) AutoPurgeDeployments() {
+	log.Info("Start the loop for purging idle engines")
 	for {
 		deployedCollections, err := c.Scheduler.GetDeployedCollections()
 		if err != nil {
@@ -151,7 +153,8 @@ func (c *Controller) autoPurgeDeployments() {
 // Last time used is defined as:
 // 1. If none of the collections has a run, it will be the last launch time of the engines of a collection
 // 2. If any of the collection has a run, it will be the end time of that run
-func (c *Controller) autoPurgeProjectIngressController() {
+func (c *Controller) AutoPurgeProjectIngressController() {
+	log.Info("Start the loop for purging idle ingress controllers")
 	projectLastUsedTime := make(map[int64]time.Time)
 	ingressLifespan, err := time.ParseDuration(config.SC.IngressConfig.Lifespan)
 	if err != nil {

diff --git a/shibuya/controller/main.go b/shibuya/controller/main.go
@@ -44,17 +44,6 @@ func NewController() *Controller {
 	}
 	c.schedulerKind = config.SC.ExecutorConfig.Cluster.Kind
 	c.Scheduler = scheduler.NewEngineScheduler(config.SC.ExecutorConfig.Cluster)
-
-	// First we do is to resume the running plans
-	// This method should not be moved as later goroutines rely on it.
-	c.resumeRunningPlans()
-	go c.streamToApi()
-	go c.readConnectedEngines()
-	go c.checkRunningThenTerminate()
-	go c.fetchEngineMetrics()
-	go c.cleanLocalStore()
-	go c.autoPurgeDeployments()
-	go c.autoPurgeProjectIngressController()
 	return c
 }
 
@@ -70,6 +59,31 @@ type ApiMetricStreamEvent struct {
 	PlanID       string `json:"plan_id"`
 }
 
+func (c *Controller) StartRunning() {
+	// First we do is to resume the running plans
+	// This method should not be moved as later goroutines rely on it.
+	c.resumeRunningPlans()
+	go c.streamToApi()
+	go c.readConnectedEngines()
+	go c.fetchEngineMetrics()
+	go c.cleanLocalStore()
+	// We can only move this func to an isolated controller process later
+	// because when we are terminating, we also need to close the opening connections
+	// Otherwise we might face connection leaks
+	go c.CheckRunningThenTerminate()
+	if !config.SC.DistributedMode {
+		log.Info("Controller is running in non-distributed mode!")
+		go c.IsolateBackgroundTasks()
+	}
+}
+
+// In distributed mode, the func will be running as a standalone process
+// In non-distributed mode, the func will be run as a goroutine.
+func (c *Controller) IsolateBackgroundTasks() {
+	go c.AutoPurgeDeployments()
+	c.AutoPurgeProjectIngressController()
+}
+
 func (c *Controller) streamToApi() {
 	for {
 		select {