From 0236b71bd0b9dac6149b010a7ff88b00b6771a6e Mon Sep 17 00:00:00 2001 From: Helber Belmiro Date: Thu, 26 Oct 2023 15:08:48 -0300 Subject: [PATCH] Parameterized HealthCheck Timeouts Signed-off-by: Helber Belmiro --- config/base/kustomization.yaml | 14 +++++++++++++ config/base/params.env | 2 ++ config/configmaps/files/config.yaml | 6 ++++++ controllers/config/defaults.go | 31 +++++++++++++++++++---------- controllers/database.go | 12 ++++++++--- controllers/storage.go | 13 +++++++++--- controllers/storage_test.go | 11 +++++----- controllers/suite_test.go | 4 ++-- 8 files changed, 69 insertions(+), 24 deletions(-) diff --git a/config/base/kustomization.yaml b/config/base/kustomization.yaml index 80749b52a..e38566b3c 100644 --- a/config/base/kustomization.yaml +++ b/config/base/kustomization.yaml @@ -113,5 +113,19 @@ vars: apiVersion: v1 fieldref: fieldpath: data.MAX_CONCURRENT_RECONCILES + - name: DSPO_HEALTHCHECK_DATABASE_CONNECTIONTIMEOUT + objref: + kind: ConfigMap + name: dspo-parameters + apiVersion: v1 + fieldref: + fieldpath: data.DSPO_HEALTHCHECK_DATABASE_CONNECTIONTIMEOUT + - name: DSPO_HEALTHCHECK_OBJECTSTORE_CONNECTIONTIMEOUT + objref: + kind: ConfigMap + name: dspo-parameters + apiVersion: v1 + fieldref: + fieldpath: data.DSPO_HEALTHCHECK_OBJECTSTORE_CONNECTIONTIMEOUT configurations: - params.yaml diff --git a/config/base/params.env b/config/base/params.env index 3ab9edd3f..f18f38937 100644 --- a/config/base/params.env +++ b/config/base/params.env @@ -12,3 +12,5 @@ IMAGES_MARIADB=registry.redhat.io/rhel8/mariadb-103:1 IMAGES_OAUTHPROXY=registry.redhat.io/openshift4/ose-oauth-proxy@sha256:ab112105ac37352a2a4916a39d6736f5db6ab4c29bad4467de8d613e80e9bb33 ZAP_LOG_LEVEL=info MAX_CONCURRENT_RECONCILES=10 +DSPO_HEALTHCHECK_DATABASE_CONNECTIONTIMEOUT=15s +DSPO_HEALTHCHECK_OBJECTSTORE_CONNECTIONTIMEOUT=15s diff --git a/config/configmaps/files/config.yaml b/config/configmaps/files/config.yaml index 477512b54..bc6c37dd4 100644 --- a/config/configmaps/files/config.yaml +++ b/config/configmaps/files/config.yaml @@ -10,3 +10,9 @@ Images: MlmdEnvoy: $(IMAGES_MLMDENVOY) MlmdGRPC: $(IMAGES_MLMDGRPC) MlmdWriter: $(IMAGES_MLMDWRITER) +DSPO: + HealthCheck: + Database: + ConnectionTimeout: $(DSPO_HEALTHCHECK_DATABASE_CONNECTIONTIMEOUT) + ObjectStore: + ConnectionTimeout: $(DSPO_HEALTHCHECK_OBJECTSTORE_CONNECTIONTIMEOUT) diff --git a/controllers/config/defaults.go b/controllers/config/defaults.go index 2b0c38f65..58cc63903 100644 --- a/controllers/config/defaults.go +++ b/controllers/config/defaults.go @@ -59,17 +59,19 @@ const ( // DSPO Config File Paths const ( - APIServerImagePath = "Images.ApiServer" - APIServerArtifactImagePath = "Images.Artifact" - PersistenceAgentImagePath = "Images.PersistentAgent" - ScheduledWorkflowImagePath = "Images.ScheduledWorkflow" - APIServerCacheImagePath = "Images.Cache" - APIServerMoveResultsImagePath = "Images.MoveResultsImage" - MariaDBImagePath = "Images.MariaDB" - OAuthProxyImagePath = "Images.OAuthProxy" - MlmdEnvoyImagePath = "Images.MlmdEnvoy" - MlmdGRPCImagePath = "Images.MlmdGRPC" - MlmdWriterImagePath = "Images.MlmdWriter" + APIServerImagePath = "Images.ApiServer" + APIServerArtifactImagePath = "Images.Artifact" + PersistenceAgentImagePath = "Images.PersistentAgent" + ScheduledWorkflowImagePath = "Images.ScheduledWorkflow" + APIServerCacheImagePath = "Images.Cache" + APIServerMoveResultsImagePath = "Images.MoveResultsImage" + MariaDBImagePath = "Images.MariaDB" + OAuthProxyImagePath = "Images.OAuthProxy" + MlmdEnvoyImagePath = "Images.MlmdEnvoy" + MlmdGRPCImagePath = "Images.MlmdGRPC" + MlmdWriterImagePath = "Images.MlmdWriter" + ObjStoreConnectionTimeoutConfigName = "DSPO.HealthCheck.ObjectStore.ConnectionTimeout" + DBConnectionTimeoutConfigName = "DSPO.HealthCheck.Database.ConnectionTimeout" ) // DSPA Status Condition Types @@ -152,3 +154,10 @@ func GetStringConfigWithDefault(configName, value string) string { } return viper.GetString(configName) } + +func GetDurationConfigWithDefault(configName string, value time.Duration) time.Duration { + if !viper.IsSet(configName) { + return value + } + return viper.GetDuration(configName) +} diff --git a/controllers/database.go b/controllers/database.go index 5ba313f49..b0e6d891f 100644 --- a/controllers/database.go +++ b/controllers/database.go @@ -24,6 +24,7 @@ import ( _ "github.com/go-sql-driver/mysql" dspav1alpha1 "github.com/opendatahub-io/data-science-pipelines-operator/api/v1alpha1" "github.com/opendatahub-io/data-science-pipelines-operator/controllers/config" + "time" ) const dbSecret = "mariadb/secret.yaml.tmpl" @@ -36,9 +37,9 @@ var mariadbTemplates = []string{ } // extract to var for mocking in testing -var ConnectAndQueryDatabase = func(host, port, username, password, dbname string) bool { +var ConnectAndQueryDatabase = func(host, port, username, password, dbname string, dbConnectionTimeout time.Duration) bool { // Create a context with a timeout of 1 second - ctx, cancel := context.WithTimeout(context.Background(), config.DefaultDBConnectionTimeout) + ctx, cancel := context.WithTimeout(context.Background(), dbConnectionTimeout) defer cancel() connectionString := fmt.Sprintf("%s:%s@tcp(%s:%s)/%s", username, password, host, port, dbname) @@ -72,11 +73,16 @@ func (r *DSPAReconciler) isDatabaseAccessible(ctx context.Context, dsp *dspav1al } decodePass, _ := b64.StdEncoding.DecodeString(params.DBConnection.Password) + dbConnectionTimeout := config.GetDurationConfigWithDefault(config.DBConnectionTimeoutConfigName, config.DefaultDBConnectionTimeout) + + log.V(1).Info(fmt.Sprintf("Database Heath Check connection timeout: %s", dbConnectionTimeout)) + dbHealthCheckPassed := ConnectAndQueryDatabase(params.DBConnection.Host, params.DBConnection.Port, params.DBConnection.Username, string(decodePass), - params.DBConnection.DBName) + params.DBConnection.DBName, + dbConnectionTimeout) if dbHealthCheckPassed { log.Info("Database Health Check Successful") } else { diff --git a/controllers/storage.go b/controllers/storage.go index 57b95a558..90606da30 100644 --- a/controllers/storage.go +++ b/controllers/storage.go @@ -30,6 +30,7 @@ import ( dspav1alpha1 "github.com/opendatahub-io/data-science-pipelines-operator/api/v1alpha1" "github.com/opendatahub-io/data-science-pipelines-operator/controllers/config" "github.com/opendatahub-io/data-science-pipelines-operator/controllers/util" + "time" ) const storageSecret = "minio/secret.yaml.tmpl" @@ -93,7 +94,7 @@ func getHttpsTransportWithCACert(log logr.Logger, pemCerts []byte) (*http.Transp return transport, nil } -var ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte) bool { +var ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte, objStoreConnectionTimeout time.Duration) bool { cred := createCredentialProvidersChain(string(accesskey), string(secretkey)) opts := &minio.Options{ @@ -116,7 +117,7 @@ var ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoin return false } - ctx, cancel := context.WithTimeout(ctx, config.DefaultObjStoreConnectionTimeout) + ctx, cancel := context.WithTimeout(ctx, objStoreConnectionTimeout) defer cancel() // Attempt to run Stat on the Object. It doesn't necessarily have to exist, we just want to verify we can successfully run an authenticated s3 command @@ -176,7 +177,13 @@ func (r *DSPAReconciler) isObjectStorageAccessible(ctx context.Context, dsp *dsp return false } - verified := ConnectAndQueryObjStore(ctx, log, endpoint, params.ObjectStorageConnection.Bucket, accesskey, secretkey, *params.ObjectStorageConnection.Secure, params.APICustomPemCerts) + objStoreConnectionTimeout := config.GetDurationConfigWithDefault(config.ObjStoreConnectionTimeoutConfigName, config.DefaultObjStoreConnectionTimeout) + + log.V(1).Info(fmt.Sprintf("Object Store connection timeout: %s", objStoreConnectionTimeout)) + + verified := ConnectAndQueryObjStore(ctx, log, endpoint, params.ObjectStorageConnection.Bucket, accesskey, secretkey, + *params.ObjectStorageConnection.Secure, params.APICustomPemCerts, objStoreConnectionTimeout) + if verified { log.Info("Object Storage Health Check Successful") } else { diff --git a/controllers/storage_test.go b/controllers/storage_test.go index d5f16dbb3..36d863b97 100644 --- a/controllers/storage_test.go +++ b/controllers/storage_test.go @@ -21,6 +21,7 @@ import ( "context" "encoding/base64" "testing" + "time" "github.com/go-logr/logr" "github.com/minio/minio-go/v7/pkg/credentials" @@ -269,7 +270,7 @@ func TestDefaultDeployBehaviorStorage(t *testing.T) { func TestIsDatabaseAccessibleTrue(t *testing.T) { // Override the live connection function with a mock version - ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte) bool { + ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte, objStoreConnectionTimeout time.Duration) bool { return true } @@ -307,7 +308,7 @@ func TestIsDatabaseAccessibleTrue(t *testing.T) { func TestIsDatabaseNotAccessibleFalse(t *testing.T) { // Override the live connection function with a mock version - ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte) bool { + ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte, objStoreConnectionTimeout time.Duration) bool { return false } @@ -345,7 +346,7 @@ func TestIsDatabaseNotAccessibleFalse(t *testing.T) { func TestDisabledHealthCheckReturnsTrue(t *testing.T) { // Override the live connection function with a mock version that would always return false if called - ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte) bool { + ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte, objStoreConnectionTimeout time.Duration) bool { return false } @@ -385,7 +386,7 @@ func TestDisabledHealthCheckReturnsTrue(t *testing.T) { func TestIsDatabaseAccessibleBadAccessKey(t *testing.T) { // Override the live connection function with a mock version - ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte) bool { + ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte, objStoreConnectionTimeout time.Duration) bool { return true } @@ -423,7 +424,7 @@ func TestIsDatabaseAccessibleBadAccessKey(t *testing.T) { func TestIsDatabaseAccessibleBadSecretKey(t *testing.T) { // Override the live connection function with a mock version - ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte) bool { + ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte, objStoreConnectionTimeout time.Duration) bool { return true } diff --git a/controllers/suite_test.go b/controllers/suite_test.go index c8e9c2801..5697550f4 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -73,10 +73,10 @@ func TestAPIs(t *testing.T) { var _ = BeforeEach(func() { By("Overriding the Database and Object Store live connection functions with trivial stubs") - ConnectAndQueryDatabase = func(host string, port string, username string, password string, dbname string) bool { + ConnectAndQueryDatabase = func(host string, port string, username string, password string, dbname string, dbConnectionTimeout time.Duration) bool { return true } - ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte) bool { + ConnectAndQueryObjStore = func(ctx context.Context, log logr.Logger, endpoint, bucket string, accesskey, secretkey []byte, secure bool, pemCerts []byte, objStoreConnectionTimeout time.Duration) bool { return true } })