Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix tests to run locally and in CI and misc fixes #949

Merged
merged 12 commits into from
Feb 9, 2024
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ on: [push]
jobs:
ruby-tests:
runs-on: ubuntu-latest
env:
CI: true

name: "Tests (${{matrix.test_suite}}) - Ruby ${{ matrix.ruby }} with Kubernetes ${{ matrix.kubernetes_version }}"
strategy:
Expand Down
15 changes: 15 additions & 0 deletions test/helpers/test_provisioner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,25 @@ def prepare_pv(name, storage_class_name: nil)

private

def wait_for_default_service_account(kubeclient, namespace)
30.times do
begin
sa = kubeclient.get_service_account('default', namespace)
return if sa
rescue Kubeclient::ResourceNotFoundError
# If the service account is not found, sleep for a second and then retry
sleep(1)
end
end
raise "Default service account in #{namespace} not ready after 30 seconds"
end

def create_namespace(namespace)
ns = Kubeclient::Resource.new(kind: 'Namespace')
ns.metadata = { name: namespace }
kubeclient.create_namespace(ns)
# wait for the serviceaccount 'default' to be created; https://github.com/kubernetes/kubernetes/issues/66689
wait_for_default_service_account(kubeclient, namespace)
end
end
end
37 changes: 21 additions & 16 deletions test/integration/krane_deploy_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ def test_output_of_failed_unmanaged_pod
assert_logs_match_all([
"Failed to deploy 1 priority resource",
"Pod status: Failed.",
"no such file or directory",
*("no such file or directory" if ENV['CI'] == 'true'),
], in_order: true)
end

Expand Down Expand Up @@ -508,7 +508,9 @@ def test_unrunnable_container_on_deployment_pod_fails_quickly
"Logs from container 'successful-init'",
"Log from successful init container",
], in_order: true)
assert_logs_match("no such file or directory")
if ENV['CI'] == 'true'
assert_logs_match("no such file or directory")
end
end

def test_wait_false_still_waits_for_priority_resources
Expand Down Expand Up @@ -701,7 +703,10 @@ def test_deploy_result_logging_for_mixed_result_deploy
%r{Deployment/bad-probe: TIMED OUT \(progress deadline: \d+s\)},
"Timeout reason: ProgressDeadlineExceeded",
]
end_bad_probe_logs = ["Scaled up replica set bad-probe-"] # event

end_bad_probe_logs = [
*("Scaled up replica set bad-probe-" if ENV['CI'] == 'true') #event
]

# Debug info for bad probe timeout
assert_logs_match_all(start_bad_probe_logs + [
Expand All @@ -719,7 +724,7 @@ def test_deploy_result_logging_for_mixed_result_deploy
"Timeout reason: ProgressDeadlineExceeded",
/Latest ReplicaSet: missing-volumes-\w+/,
"Final status: 1 replica, 1 updatedReplica, 1 unavailableReplica",
/FailedMount.*secrets? "catphotoscom" not found/, # event
*(%r{.*FailedMount.*secret "catphotoscom" not found.*} if ENV['CI'] == 'true'), #event
], in_order: true)

# Debug info for failure
Expand All @@ -729,7 +734,7 @@ def test_deploy_result_logging_for_mixed_result_deploy
"The following containers are in a state that is unlikely to be recoverable:",
"init-crash-loop-back-off: Crashing repeatedly (exit 1). See logs for more information.",
"Final status: 1 replica, 1 updatedReplica, 1 unavailableReplica",
"Scaled up replica set init-crash-", # event
*("Scaled up replica set init-crash-" if ENV['CI'] == 'true'),
"this is a log from the crashing init container",
], in_order: true)

Expand Down Expand Up @@ -1113,8 +1118,8 @@ def test_bad_container_on_daemon_sets_fails
"DaemonSet/crash-loop: FAILED",
"crash-loop-back-off: Crashing repeatedly (exit 1). See logs for more information.",
"Final status: #{num_ds} updatedNumberScheduled, #{num_ds} desiredNumberScheduled, 0 numberReady",
"Events (common success events excluded):",
"BackOff: Back-off restarting failed container",
*("Events (common success events excluded):" if ENV['CI'] == 'true'),
*("BackOff: Back-off restarting failed container" if ENV['CI'] == 'true'),
"Logs from container 'crash-loop-back-off':",
"this is a log from the crashing container",
], in_order: true)
Expand All @@ -1134,8 +1139,8 @@ def test_bad_container_on_stateful_sets_fails_with_rolling_update
"Successfully deployed 1 resource and failed to deploy 1 resource",
"StatefulSet/stateful-busybox: FAILED",
"app: Crashing repeatedly (exit 1). See logs for more information.",
"Events (common success events excluded):",
%r{\[Pod/stateful-busybox-\d\]\tBackOff: Back-off restarting failed container},
*("Events (common success events excluded):" if ENV['CI'] == 'true'), # event
*(%r{\[Pod/stateful-busybox-\d\]\tBackOff: Back-off restarting failed container} if ENV['CI'] == 'true'),
"Logs from container 'app':",
"ls: /not-a-dir: No such file or directory",
], in_order: true)
Expand Down Expand Up @@ -1182,7 +1187,7 @@ def test_resource_quotas_are_deployed_first
"ResourceQuota/resource-quotas",
%r{Deployment/web: TIMED OUT \(progress deadline: \d+s\)},
"Timeout reason: ProgressDeadlineExceeded",
"failed quota: resource-quotas", # from an event
*("failed quota: resource-quotas" if ENV['CI'] == 'true'), # from an event
], in_order: true)

rqs = kubeclient.get_resource_quotas(namespace: @namespace)
Expand Down Expand Up @@ -1330,7 +1335,7 @@ def test_jobs_can_fail
"Result: FAILURE",
"Job/hello-job: FAILED",
"Final status: Failed",
%r{\[Job/hello-job\]\tDeadlineExceeded: Job was active longer than specified deadline \(\d+ events\)},
*(%r{\[Job/hello-job\]\tDeadlineExceeded: Job was active longer than specified deadline \(\d+ events\)} if ENV['CI'] == 'true'),
])
end

Expand All @@ -1343,19 +1348,19 @@ def test_resource_watcher_reports_failed_after_timeout
bad_probe = f["bad_probe.yml"]["Deployment"].first
bad_probe["spec"]["progressDeadlineSeconds"] = 5
f["missing_volumes.yml"]["Deployment"].first["spec"]["progressDeadlineSeconds"] = 30
f["cannot_run.yml"]["Deployment"].first["spec"]["replicas"] = 1
f["cannot_run.yml"]["Deployment"].first["spec"]["replicas"] = 1 #this results in pods in CrashLoopBackOff
end
assert_deploy_failure(result)
assert_deploy_failure_or_timeout(result)

bad_probe_timeout = "Deployment/bad-probe: TIMED OUT (progress deadline: 5s)"

assert_logs_match_all([
"Successfully deployed 1 resource, timed out waiting for 2 resources to deploy, and failed to deploy 1 resource",
/Successfully deployed 1 resource(,| and) timed out waiting for/,
"Successful resources",
"ConfigMap/test",
"Deployment/cannot-run: FAILED",
bad_probe_timeout,
"Deployment/missing-volumes: GLOBAL WATCH TIMEOUT (20 seconds)",
/(Continuing to wait for:.*Deployment\/cannot-run.*)|(Deployment\/cannot-run: FAILED)/,
/(Continuing to wait for:.*Deployment\/missing-volumes.*)|(Deployment\/missing-volumes: GLOBAL WATCH TIMEOUT \(20 seconds\))/,
])
end

Expand Down
4 changes: 2 additions & 2 deletions test/integration/restart_task_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_restart_statefulset_on_delete_restarts_child_pods
"Waiting for rollout",
"Result: SUCCESS",
"Successfully restarted 1 resource",
%r{StatefulSet/stateful-busybox.* 2 replicas},
%r{StatefulSet/stateful-busybox.* (2 replicas|1 replica, 1 currentReplica)},
],
in_order: true)
end
Expand Down Expand Up @@ -291,7 +291,7 @@ def test_restart_failure
"The following containers have not passed their readiness probes",
"app must exit 0 from the following command",
"Final status: 2 replicas, 1 updatedReplica, 1 availableReplica, 1 unavailableReplica",
"Unhealthy: Readiness probe failed",
*("Unhealthy: Readiness probe failed" if ENV['CI'] == 'true'),
],
in_order: true)
end
Expand Down
8 changes: 8 additions & 0 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ def assert_deploy_failure(result, cause = nil)
alias_method :assert_restart_failure, :assert_deploy_failure
alias_method :assert_task_run_failure, :assert_deploy_failure

def assert_deploy_failure_or_timeout(result)
assert_equal(false, result, "Deploy succeeded when it was expected to fail.#{logs_message_if_captured}")
logging_assertion do |logs|
assert(logs.include?("Result: FAILURE") || logs.include?("Result: TIMED OUT"),
"'Result: FAILURE' or 'Result: TIMED OUT' not found in the following logs:\n#{logs}")
end
end

def assert_deploy_success(result)
assert_equal(true, result, "Deploy failed when it was expected to succeed.#{logs_message_if_captured}")
logging_assertion do |logs|
Expand Down
Loading