From 3fb816d56855117ea69d5980a4467d221fd7cea1 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Thu, 21 Nov 2024 17:59:13 +0100 Subject: [PATCH] Add v1beta2 RollingOut condition --- api/v1beta1/cluster_types.go | 23 +++ api/v1beta1/machinedeployment_types.go | 15 ++ api/v1beta1/v1beta2_condition_consts.go | 13 ++ .../api/v1beta1/v1beta2_condition_consts.go | 15 ++ .../internal/controllers/controller.go | 1 + .../kubeadm/internal/controllers/status.go | 58 ++++++ .../internal/controllers/status_test.go | 93 +++++++++ .../controllers/cluster/cluster_controller.go | 1 + .../cluster/cluster_controller_status.go | 72 +++++++ .../cluster/cluster_controller_status_test.go | 195 ++++++++++++++++++ .../machinedeployment_controller.go | 1 + .../machinedeployment_status.go | 69 +++++++ .../machinedeployment_status_test.go | 103 +++++++++ util/conditions/v1beta2/sort.go | 2 + 14 files changed, 661 insertions(+) diff --git a/api/v1beta1/cluster_types.go b/api/v1beta1/cluster_types.go index b9cbe1b98170..0c90be618820 100644 --- a/api/v1beta1/cluster_types.go +++ b/api/v1beta1/cluster_types.go @@ -281,6 +281,29 @@ const ( ClusterRemoteConnectionProbeSucceededV1Beta2Reason = "ProbeSucceeded" ) +// Cluster's RollingOut condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // ClusterRollingOutV1Beta2Condition is the summary of `RollingOut` conditions from ControlPlane, MachineDeployments + // and MachinePools. + ClusterRollingOutV1Beta2Condition = RollingOutV1Beta2Condition + + // ClusterRollingOutV1Beta2Reason surfaces when at least one of the Cluster's control plane, MachineDeployments, + // or MachinePools are rolling out. + ClusterRollingOutV1Beta2Reason = RollingOutV1Beta2Reason + + // ClusterNotRollingOutV1Beta2Reason surfaces when none of the Cluster's control plane, MachineDeployments, + // or MachinePools are rolling out. + ClusterNotRollingOutV1Beta2Reason = NotRollingOutV1Beta2Reason + + // ClusterRollingOutUnknownV1Beta2Reason surfaces when one of the Cluster's control plane, MachineDeployments, + // or MachinePools rolling out condition is unknown, and none true. + ClusterRollingOutUnknownV1Beta2Reason = "RollingOutUnknown" + + // ClusterRollingOutInternalErrorV1Beta2Reason surfaces unexpected failures when listing machines + // or computing the RollingOut condition. + ClusterRollingOutInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason +) + // Cluster's ScalingUp condition and corresponding reasons that will be used in v1Beta2 API version. const ( // ClusterScalingUpV1Beta2Condition is the summary of `ScalingUp` conditions from ControlPlane, MachineDeployments, diff --git a/api/v1beta1/machinedeployment_types.go b/api/v1beta1/machinedeployment_types.go index 9f0a5856f6a2..a6a0031c0e21 100644 --- a/api/v1beta1/machinedeployment_types.go +++ b/api/v1beta1/machinedeployment_types.go @@ -148,6 +148,21 @@ const ( MachineDeploymentMachinesUpToDateInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason ) +// MachineDeployment's RollingOut condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineDeploymentRollingOutV1Beta2Condition is true if there is at least one machine not up-to-date. + MachineDeploymentRollingOutV1Beta2Condition = RollingOutV1Beta2Condition + + // MachineDeploymentRollingOutV1Beta2Reason surfaces when there is at least one machine not up-to-date. + MachineDeploymentRollingOutV1Beta2Reason = RollingOutV1Beta2Reason + + // MachineDeploymentNotRollingOutV1Beta2Reason surfaces when all the machines are up-to-date. + MachineDeploymentNotRollingOutV1Beta2Reason = NotRollingOutV1Beta2Reason + + // MachineDeploymentRollingOutInternalErrorV1Beta2Reason surfaces unexpected failures when listing machines. + MachineDeploymentRollingOutInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason +) + // MachineDeployment's ScalingUp condition and corresponding reasons that will be used in v1Beta2 API version. const ( // MachineDeploymentScalingUpV1Beta2Condition is true if actual replicas < desired replicas. diff --git a/api/v1beta1/v1beta2_condition_consts.go b/api/v1beta1/v1beta2_condition_consts.go index f30b00dac6c3..b9e36987a8a6 100644 --- a/api/v1beta1/v1beta2_condition_consts.go +++ b/api/v1beta1/v1beta2_condition_consts.go @@ -54,6 +54,13 @@ const ( // the same condition type exists. MachinesUpToDateV1Beta2Condition = "MachinesUpToDate" + // RollingOutV1Beta2Condition reports if an object is rolling out changes to machines; Cluster API usually + // rolls out changes to machines by replacing not up-to-date machines with new ones. + // Note: This condition type is defined to ensure consistent naming of conditions across objects. + // Please use object specific variants of this condition which provides more details for each context where + // the same condition type exists. + RollingOutV1Beta2Condition = "RollingOut" + // ScalingUpV1Beta2Condition reports if an object is scaling up. // Note: This condition type is defined to ensure consistent naming of conditions across objects. // Please use object specific variants of this condition which provides more details for each context where @@ -114,6 +121,12 @@ const ( // UpToDateUnknownV1Beta2Reason applies to a condition surfacing object up-tp-date unknown. UpToDateUnknownV1Beta2Reason = "UpToDateUnknown" + // RollingOutV1Beta2Reason surfaces when an object is rolling out. + RollingOutV1Beta2Reason = "RollingOut" + + // NotRollingOutV1Beta2Reason surfaces when an object is not rolling out. + NotRollingOutV1Beta2Reason = "NotRollingOut" + // ScalingUpV1Beta2Reason surfaces when an object is scaling up. ScalingUpV1Beta2Reason = "ScalingUp" diff --git a/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go b/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go index 84bd7adacedb..551acd60b78f 100644 --- a/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go +++ b/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go @@ -176,6 +176,21 @@ const ( KubeadmControlPlaneMachinesUpToDateInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason ) +// KubeadmControlPlane's RollingOut condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // KubeadmControlPlaneRollingOutV1Beta2Condition is true if there is at least one machine not up-to-date. + KubeadmControlPlaneRollingOutV1Beta2Condition = clusterv1.RollingOutV1Beta2Condition + + // KubeadmControlPlaneRollingOutV1Beta2Reason surfaces when there is at least one machine not up-to-date. + KubeadmControlPlaneRollingOutV1Beta2Reason = clusterv1.RollingOutV1Beta2Reason + + // KubeadmControlPlaneNotRollingOutV1Beta2Reason surfaces when all the machines are up-to-date. + KubeadmControlPlaneNotRollingOutV1Beta2Reason = clusterv1.NotRollingOutV1Beta2Reason + + // KubeadmControlPlaneRollingOutInternalErrorV1Beta2Reason surfaces unexpected failures when listing machines. + KubeadmControlPlaneRollingOutInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason +) + // KubeadmControlPlane's ScalingUp condition and corresponding reasons that will be used in v1Beta2 API version. const ( // KubeadmControlPlaneScalingUpV1Beta2Condition is true if actual replicas < desired replicas. diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index 0dc9bfe90d95..c1b4710e4331 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -353,6 +353,7 @@ func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kc controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, controlplanev1.KubeadmControlPlaneMachinesReadyV1Beta2Condition, controlplanev1.KubeadmControlPlaneMachinesUpToDateV1Beta2Condition, + controlplanev1.KubeadmControlPlaneRollingOutV1Beta2Condition, controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition, controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition, controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition, diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index da51e4da5458..cc752d777bca 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -161,6 +161,7 @@ func (r *KubeadmControlPlaneReconciler) updateV1Beta2Status(ctx context.Context, setReplicas(ctx, controlPlane.KCP, controlPlane.Machines) setInitializedCondition(ctx, controlPlane.KCP) + setRollingOutCondition(ctx, controlPlane.KCP, controlPlane.Machines) setScalingUpCondition(ctx, controlPlane.KCP, controlPlane.Machines, controlPlane.InfraMachineTemplateIsNotFound, controlPlane.PreflightCheckResults) setScalingDownCondition(ctx, controlPlane.KCP, controlPlane.Machines, controlPlane.PreflightCheckResults) setMachinesReadyCondition(ctx, controlPlane.KCP, controlPlane.Machines) @@ -210,6 +211,63 @@ func setInitializedCondition(_ context.Context, kcp *controlplanev1.KubeadmContr }) } +func setRollingOutCondition(_ context.Context, kcp *controlplanev1.KubeadmControlPlane, machines collections.Machines) { + // Count machines rolling out and collect reasons why a rollout is happening. + // Note: The code below collects all the reasons for which at least a machine is rolling out; under normal circumstances + // all the machines are rolling out for the same reasons, however, in case of changes to KCP + // before a previous changes is not fully rolled out, there could be machines rolling out for + // different reasons. + rollingOutReplicas := 0 + rolloutReasons := sets.Set[string]{} + for _, machine := range machines { + upToDateCondition := v1beta2conditions.Get(machine, clusterv1.MachineUpToDateV1Beta2Condition) + if upToDateCondition == nil || upToDateCondition.Status != metav1.ConditionFalse { + continue + } + rollingOutReplicas++ + if upToDateCondition.Message != "" { + rolloutReasons.Insert(strings.Split(upToDateCondition.Message, "; ")...) + } + } + + if rollingOutReplicas == 0 { + var message string + v1beta2conditions.Set(kcp, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneRollingOutV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotRollingOutV1Beta2Reason, + Message: message, + }) + return + } + + // Rolling out. + message := fmt.Sprintf("Rolling out %d not up-to-date replicas", rollingOutReplicas) + if rolloutReasons.Len() > 0 { + // Surface rollout reasons ensuring that if there is a version change, it goes first. + reasons := rolloutReasons.UnsortedList() + sort.Slice(reasons, func(i, j int) bool { + if strings.HasPrefix(reasons[i], "Version") && !strings.HasPrefix(reasons[j], "Version") { + return true + } + if !strings.HasPrefix(reasons[i], "Version") && strings.HasPrefix(reasons[j], "Version") { + return false + } + return reasons[i] < reasons[j] + }) + for i := range reasons { + reasons[i] = fmt.Sprintf("* %s", reasons[i]) + } + message += fmt.Sprintf("\n%s", strings.Join(reasons, "\n")) + } + v1beta2conditions.Set(kcp, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneRollingOutV1Beta2Reason, + Message: message, + }) +} + func setScalingUpCondition(_ context.Context, kcp *controlplanev1.KubeadmControlPlane, machines collections.Machines, infrastructureObjectNotFound bool, preflightChecks internal.PreflightCheckResults) { if kcp.Spec.Replicas == nil { v1beta2conditions.Set(kcp, metav1.Condition{ diff --git a/controlplane/kubeadm/internal/controllers/status_test.go b/controlplane/kubeadm/internal/controllers/status_test.go index 289a2bbe8580..7a154b0b5cfa 100644 --- a/controlplane/kubeadm/internal/controllers/status_test.go +++ b/controlplane/kubeadm/internal/controllers/status_test.go @@ -121,6 +121,99 @@ func Test_setInitializedCondition(t *testing.T) { } } +func Test_setRollingOutCondition(t *testing.T) { + upToDateCondition := metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + } + + tests := []struct { + name string + kcp *controlplanev1.KubeadmControlPlane + machines []*clusterv1.Machine + expectCondition metav1.Condition + }{ + { + name: "no machines", + kcp: &controlplanev1.KubeadmControlPlane{}, + machines: []*clusterv1.Machine{}, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneRollingOutV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotRollingOutV1Beta2Reason, + }, + }, + { + name: "all machines are up to date", + kcp: &controlplanev1.KubeadmControlPlane{}, + machines: []*clusterv1.Machine{ + {ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{upToDateCondition}}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{upToDateCondition}}}}, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneRollingOutV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotRollingOutV1Beta2Reason, + }, + }, + { + name: "one up-to-date, two not up-to-date, one reporting up-to-date unknown", + kcp: &controlplanev1.KubeadmControlPlane{}, + machines: []*clusterv1.Machine{ + {ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{upToDateCondition}}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.InternalErrorV1Beta2Reason, + }, + }}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpToDateV1Beta2Reason, + Message: "Version v1.25.0, v1.26.0 required", + }, + }}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "m4"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpToDateV1Beta2Reason, + Message: "Failure domain failure-domain1, failure-domain2 required; InfrastructureMachine is not up-to-date", + }, + }}}}, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneRollingOutV1Beta2Reason, + Message: "Rolling out 2 not up-to-date replicas\n" + + "* Version v1.25.0, v1.26.0 required\n" + + "* Failure domain failure-domain1, failure-domain2 required\n" + + "* InfrastructureMachine is not up-to-date", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + var machines collections.Machines + if tt.machines != nil { + machines = collections.FromMachines(tt.machines...) + } + setRollingOutCondition(ctx, tt.kcp, machines) + + condition := v1beta2conditions.Get(tt.kcp, controlplanev1.KubeadmControlPlaneRollingOutV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + func Test_setScalingUpCondition(t *testing.T) { tests := []struct { name string diff --git a/internal/controllers/cluster/cluster_controller.go b/internal/controllers/cluster/cluster_controller.go index 62c19e4d738c..e30414d94ce2 100644 --- a/internal/controllers/cluster/cluster_controller.go +++ b/internal/controllers/cluster/cluster_controller.go @@ -265,6 +265,7 @@ func patchCluster(ctx context.Context, patchHelper *patch.Helper, cluster *clust clusterv1.ClusterMachinesReadyV1Beta2Condition, clusterv1.ClusterMachinesUpToDateV1Beta2Condition, clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition, + clusterv1.ClusterRollingOutV1Beta2Condition, clusterv1.ClusterScalingUpV1Beta2Condition, clusterv1.ClusterScalingDownV1Beta2Condition, clusterv1.ClusterRemediatingV1Beta2Condition, diff --git a/internal/controllers/cluster/cluster_controller_status.go b/internal/controllers/cluster/cluster_controller_status.go index 59965ccd273e..46d250c6f545 100644 --- a/internal/controllers/cluster/cluster_controller_status.go +++ b/internal/controllers/cluster/cluster_controller_status.go @@ -67,6 +67,7 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) { setWorkersAvailableCondition(ctx, s.cluster, expv1.MachinePoolList{}, s.descendants.machineDeployments, s.getDescendantsSucceeded) setMachinesReadyCondition(ctx, s.cluster, allMachines, s.getDescendantsSucceeded) setMachinesUpToDateCondition(ctx, s.cluster, allMachines, s.getDescendantsSucceeded) + setRollingOutCondition(ctx, s.cluster, s.controlPlane, expv1.MachinePoolList{}, s.descendants.machineDeployments, s.controlPlaneIsNotFound, s.getDescendantsSucceeded) setScalingUpCondition(ctx, s.cluster, s.controlPlane, expv1.MachinePoolList{}, s.descendants.machineDeployments, s.descendants.machineSets, s.controlPlaneIsNotFound, s.getDescendantsSucceeded) setScalingDownCondition(ctx, s.cluster, s.controlPlane, expv1.MachinePoolList{}, s.descendants.machineDeployments, s.descendants.machineSets, s.controlPlaneIsNotFound, s.getDescendantsSucceeded) setRemediatingCondition(ctx, s.cluster, machinesToBeRemediated, unhealthyMachines, s.getDescendantsSucceeded) @@ -760,6 +761,77 @@ func setRemediatingCondition(ctx context.Context, cluster *clusterv1.Cluster, ma }) } +func setRollingOutCondition(ctx context.Context, cluster *clusterv1.Cluster, controlPlane *unstructured.Unstructured, machinePools expv1.MachinePoolList, machineDeployments clusterv1.MachineDeploymentList, controlPlaneIsNotFound bool, getDescendantsSucceeded bool) { + log := ctrl.LoggerFrom(ctx) + + // If there was some unexpected errors in getting control plane or listing descendants (this should never happen), surface it. + if (cluster.Spec.ControlPlaneRef != nil && controlPlane == nil && !controlPlaneIsNotFound) || !getDescendantsSucceeded { + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.ClusterRollingOutInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + if controlPlane == nil && len(machinePools.Items)+len(machineDeployments.Items) == 0 { + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterNotRollingOutV1Beta2Reason, + }) + return + } + + ws := make([]aggregationWrapper, 0, len(machinePools.Items)+len(machineDeployments.Items)+1) + if controlPlane != nil { + // control plane is considered only if it is reporting the condition (the contract does not require conditions to be reported) + // Note: this implies that it won't surface as "Conditions RollingOut not yet reported from ...". + if c, err := v1beta2conditions.UnstructuredGet(controlPlane, clusterv1.RollingOutV1Beta2Condition); err == nil && c != nil { + ws = append(ws, aggregationWrapper{cp: controlPlane}) + } + } + for _, mp := range machinePools.Items { + ws = append(ws, aggregationWrapper{mp: &mp}) + } + for _, md := range machineDeployments.Items { + ws = append(ws, aggregationWrapper{md: &md}) + } + + rollingOutCondition, err := v1beta2conditions.NewAggregateCondition( + ws, clusterv1.RollingOutV1Beta2Condition, + v1beta2conditions.TargetConditionType(clusterv1.ClusterRollingOutV1Beta2Condition), + // Instruct aggregate to consider RollingOut condition with negative polarity. + v1beta2conditions.NegativePolarityConditionTypes{clusterv1.RollingOutV1Beta2Condition}, + // Using a custom merge strategy to override reasons applied during merge and to ensure merge + // takes into account the fact the RollingOut has negative polarity. + v1beta2conditions.CustomMergeStrategy{ + MergeStrategy: v1beta2conditions.DefaultMergeStrategy( + v1beta2conditions.TargetConditionHasPositivePolarity(false), + v1beta2conditions.ComputeReasonFunc(v1beta2conditions.GetDefaultComputeMergeReasonFunc( + clusterv1.ClusterRollingOutV1Beta2Reason, + clusterv1.ClusterRollingOutUnknownV1Beta2Reason, + clusterv1.ClusterNotRollingOutV1Beta2Reason, + )), + v1beta2conditions.GetPriorityFunc(v1beta2conditions.GetDefaultMergePriorityFunc(clusterv1.RollingOutV1Beta2Condition)), + ), + }, + ) + if err != nil { + log.Error(err, "Failed to aggregate ControlPlane, MachinePool, MachineDeployment, MachineSet's RollingOut conditions") + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.ClusterRollingOutInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + v1beta2conditions.Set(cluster, *rollingOutCondition) +} + func setScalingUpCondition(ctx context.Context, cluster *clusterv1.Cluster, controlPlane *unstructured.Unstructured, machinePools expv1.MachinePoolList, machineDeployments clusterv1.MachineDeploymentList, machineSets clusterv1.MachineSetList, controlPlaneIsNotFound bool, getDescendantsSucceeded bool) { log := ctrl.LoggerFrom(ctx) diff --git a/internal/controllers/cluster/cluster_controller_status_test.go b/internal/controllers/cluster/cluster_controller_status_test.go index 1956d22659b3..c4c525b045b1 100644 --- a/internal/controllers/cluster/cluster_controller_status_test.go +++ b/internal/controllers/cluster/cluster_controller_status_test.go @@ -1075,6 +1075,201 @@ func TestSetMachinesUpToDateCondition(t *testing.T) { } } +func TestSetRollingOutCondition(t *testing.T) { + tests := []struct { + name string + cluster *clusterv1.Cluster + controlPlane *unstructured.Unstructured + controlPlaneIsNotFound bool + machinePools expv1.MachinePoolList + machineDeployments clusterv1.MachineDeploymentList + getDescendantsSucceeded bool + expectCondition metav1.Condition + }{ + { + name: "cluster with controlplane, unknown if failed to get descendants", + cluster: fakeCluster("c", controlPlaneRef{}), + controlPlane: nil, + controlPlaneIsNotFound: true, + getDescendantsSucceeded: false, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.ClusterRollingOutInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "cluster with controlplane, unknown if failed to get control plane", + cluster: fakeCluster("c", controlPlaneRef{}), + controlPlane: nil, + controlPlaneIsNotFound: false, + getDescendantsSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.ClusterRollingOutInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "cluster with controlplane, false if no control plane & descendants", + cluster: fakeCluster("c", controlPlaneRef{}), + controlPlaneIsNotFound: true, + getDescendantsSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterNotRollingOutV1Beta2Reason, + }, + }, + { + name: "cluster with controlplane, control plane & descendants do not report rolling out", + cluster: fakeCluster("c", controlPlaneRef{}), + controlPlane: fakeControlPlane("cp1"), + machinePools: expv1.MachinePoolList{Items: []expv1.MachinePool{ + *fakeMachinePool("mp1"), + }}, + machineDeployments: clusterv1.MachineDeploymentList{Items: []clusterv1.MachineDeployment{ + *fakeMachineDeployment("md1"), + }}, + getDescendantsSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.ClusterRollingOutUnknownV1Beta2Reason, + Message: "* MachineDeployment md1: Condition RollingOut not yet reported\n" + + "* MachinePool mp1: Condition RollingOut not yet reported", + }, + }, + { + name: "cluster with controlplane, control plane and descendants report rolling out", + cluster: fakeCluster("c", controlPlaneRef{}), + controlPlane: fakeControlPlane("cp1", condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: corev1.ConditionTrue, + Reason: clusterv1.RollingOutV1Beta2Reason, + Message: "Rolling out 3 not up-to-date replicas", + }), + machinePools: expv1.MachinePoolList{Items: []expv1.MachinePool{ + *fakeMachinePool("mp1", v1beta2Condition{ + Type: clusterv1.RollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.RollingOutV1Beta2Reason, + Message: "Rolling out 5 not up-to-date replicas", + }), + }}, + machineDeployments: clusterv1.MachineDeploymentList{Items: []clusterv1.MachineDeployment{ + *fakeMachineDeployment("md1", v1beta2Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentRollingOutV1Beta2Reason, + Message: "Rolling out 4 not up-to-date replicas", + }), + }}, + getDescendantsSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.ClusterRollingOutV1Beta2Reason, + Message: "* FakeControlPlane cp1: Rolling out 3 not up-to-date replicas\n" + + "* MachineDeployment md1: Rolling out 4 not up-to-date replicas\n" + + "* MachinePool mp1: Rolling out 5 not up-to-date replicas", + }, + }, + { + name: "cluster with controlplane, control plane not reporting conditions, descendants report rolling out", + cluster: fakeCluster("c", controlPlaneRef{}), + controlPlane: fakeControlPlane("cp1"), + machinePools: expv1.MachinePoolList{Items: []expv1.MachinePool{ + *fakeMachinePool("mp1", v1beta2Condition{ + Type: clusterv1.RollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.RollingOutV1Beta2Reason, + Message: "Rolling out 3 not up-to-date replicas", + }), + }}, + machineDeployments: clusterv1.MachineDeploymentList{Items: []clusterv1.MachineDeployment{ + *fakeMachineDeployment("md1", v1beta2Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentRollingOutV1Beta2Reason, + Message: "Rolling out 5 not up-to-date replicas", + }), + }}, + getDescendantsSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.ClusterRollingOutV1Beta2Reason, + Message: "* MachineDeployment md1: Rolling out 5 not up-to-date replicas\n" + + "* MachinePool mp1: Rolling out 3 not up-to-date replicas", + }, + }, + { + name: "cluster without controlplane, unknown if failed to get descendants", + cluster: fakeCluster("c"), + getDescendantsSucceeded: false, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.ClusterRollingOutInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "cluster without controlplane, false if no descendants", + cluster: fakeCluster("c"), + controlPlaneIsNotFound: true, + getDescendantsSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterNotRollingOutV1Beta2Reason, + }, + }, + { + name: "cluster without controlplane, descendants report scaling up", + cluster: fakeCluster("c"), + machinePools: expv1.MachinePoolList{Items: []expv1.MachinePool{ + *fakeMachinePool("mp1", v1beta2Condition{ + Type: clusterv1.RollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.RollingOutV1Beta2Reason, + Message: "Rolling out 3 not up-to-date replicas", + }), + }}, + machineDeployments: clusterv1.MachineDeploymentList{Items: []clusterv1.MachineDeployment{ + *fakeMachineDeployment("md1", v1beta2Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentRollingOutV1Beta2Reason, + Message: "Rolling out 5 not up-to-date replicas", + }), + }}, + getDescendantsSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.ClusterRollingOutV1Beta2Reason, + Message: "* MachineDeployment md1: Rolling out 5 not up-to-date replicas\n" + + "* MachinePool mp1: Rolling out 3 not up-to-date replicas", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + setRollingOutCondition(ctx, tt.cluster, tt.controlPlane, tt.machinePools, tt.machineDeployments, tt.controlPlaneIsNotFound, tt.getDescendantsSucceeded) + + condition := v1beta2conditions.Get(tt.cluster, clusterv1.ClusterRollingOutV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + func TestSetScalingUpCondition(t *testing.T) { tests := []struct { name string diff --git a/internal/controllers/machinedeployment/machinedeployment_controller.go b/internal/controllers/machinedeployment/machinedeployment_controller.go index 8b724b923f20..062c6d1e8213 100644 --- a/internal/controllers/machinedeployment/machinedeployment_controller.go +++ b/internal/controllers/machinedeployment/machinedeployment_controller.go @@ -216,6 +216,7 @@ func patchMachineDeployment(ctx context.Context, patchHelper *patch.Helper, md * clusterv1.MachineDeploymentAvailableV1Beta2Condition, clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + clusterv1.MachineDeploymentRollingOutV1Beta2Condition, clusterv1.MachineDeploymentScalingDownV1Beta2Condition, clusterv1.MachineDeploymentScalingUpV1Beta2Condition, clusterv1.MachineDeploymentRemediatingV1Beta2Condition, diff --git a/internal/controllers/machinedeployment/machinedeployment_status.go b/internal/controllers/machinedeployment/machinedeployment_status.go index 8cebf21d19c1..b156545a033e 100644 --- a/internal/controllers/machinedeployment/machinedeployment_status.go +++ b/internal/controllers/machinedeployment/machinedeployment_status.go @@ -61,6 +61,7 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) (retErr error) setAvailableCondition(ctx, s.machineDeployment, s.getAndAdoptMachineSetsForDeploymentSucceeded) + setRollingOutCondition(ctx, s.machineDeployment, machines, getMachinesSucceeded) setScalingUpCondition(ctx, s.machineDeployment, s.machineSets, s.bootstrapTemplateNotFound, s.infrastructureTemplateNotFound, s.getAndAdoptMachineSetsForDeploymentSucceeded) setScalingDownCondition(ctx, s.machineDeployment, s.machineSets, machines, s.getAndAdoptMachineSetsForDeploymentSucceeded, getMachinesSucceeded) @@ -148,6 +149,74 @@ func setAvailableCondition(_ context.Context, machineDeployment *clusterv1.Machi }) } +func setRollingOutCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, machines collections.Machines, getMachinesSucceeded bool) { + // If we got unexpected errors in listing the machines (this should never happen), surface them. + if !getMachinesSucceeded { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentRollingOutInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + // Count machines rolling out and collect reasons why a rollout is happening. + // Note: The code below collects all the reasons for which at least a machine is rolling out; under normal circumstances + // all the machines are rolling out for the same reasons, however, in case of changes to + // the MD before a previous changes is not fully rolled out, there could be machines rolling out for + // different reasons. + rollingOutReplicas := 0 + rolloutReasons := sets.Set[string]{} + for _, machine := range machines { + upToDateCondition := v1beta2conditions.Get(machine, clusterv1.MachineUpToDateV1Beta2Condition) + if upToDateCondition == nil || upToDateCondition.Status != metav1.ConditionFalse { + continue + } + rollingOutReplicas++ + if upToDateCondition.Message != "" { + rolloutReasons.Insert(strings.Split(upToDateCondition.Message, "; ")...) + } + } + + if rollingOutReplicas == 0 { + var message string + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotRollingOutV1Beta2Reason, + Message: message, + }) + return + } + + // Rolling out. + message := fmt.Sprintf("Rolling out %d not up-to-date replicas", rollingOutReplicas) + if rolloutReasons.Len() > 0 { + // Surface rollout reasons ensuring that if there is a version change, it goes first. + reasons := rolloutReasons.UnsortedList() + sort.Slice(reasons, func(i, j int) bool { + if strings.HasPrefix(reasons[i], "Version") && !strings.HasPrefix(reasons[j], "Version") { + return true + } + if !strings.HasPrefix(reasons[i], "Version") && strings.HasPrefix(reasons[j], "Version") { + return false + } + return reasons[i] < reasons[j] + }) + for i := range reasons { + reasons[i] = fmt.Sprintf("* %s", reasons[i]) + } + message += fmt.Sprintf("\n%s", strings.Join(reasons, "\n")) + } + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentRollingOutV1Beta2Reason, + Message: message, + }) +} + func setScalingUpCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, machineSets []*clusterv1.MachineSet, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachineSetsForDeploymentSucceeded bool) { // If we got unexpected errors in listing the machine sets (this should never happen), surface them. if !getAndAdoptMachineSetsForDeploymentSucceeded { diff --git a/internal/controllers/machinedeployment/machinedeployment_status_test.go b/internal/controllers/machinedeployment/machinedeployment_status_test.go index 8e05a6c4b3b2..78f7a4a9d8bb 100644 --- a/internal/controllers/machinedeployment/machinedeployment_status_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_status_test.go @@ -205,6 +205,109 @@ func Test_setAvailableCondition(t *testing.T) { } } +func Test_setRollingOutCondition(t *testing.T) { + upToDateCondition := metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + } + + tests := []struct { + name string + machineDeployment *clusterv1.MachineDeployment + machines []*clusterv1.Machine + getMachinesSucceeded bool + expectCondition metav1.Condition + }{ + { + name: "get machines failed", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: nil, + getMachinesSucceeded: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentRollingOutInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "no machines", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{}, + getMachinesSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotRollingOutV1Beta2Reason, + }, + }, + { + name: "all machines are up to date", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("machine-1", withV1Beta2Condition(upToDateCondition)), + fakeMachine("machine-2", withV1Beta2Condition(upToDateCondition)), + }, + getMachinesSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotRollingOutV1Beta2Reason, + }, + }, + { + name: "one up-to-date, two not up-to-date, one reporting up-to-date unknown", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("machine-1", withV1Beta2Condition(upToDateCondition)), + fakeMachine("machine-2", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.InternalErrorV1Beta2Reason, + })), + fakeMachine("machine-3", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpToDateV1Beta2Reason, + Message: "Version v1.25.0, v1.26.0 required", + })), + fakeMachine("machine-4", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpToDateV1Beta2Reason, + Message: "Failure domain failure-domain1, failure-domain2 required; InfrastructureMachine is not up-to-date", + })), + }, + getMachinesSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentRollingOutV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentRollingOutV1Beta2Reason, + Message: "Rolling out 2 not up-to-date replicas\n" + + "* Version v1.25.0, v1.26.0 required\n" + + "* Failure domain failure-domain1, failure-domain2 required\n" + + "* InfrastructureMachine is not up-to-date", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + var machines collections.Machines + if tt.machines != nil { + machines = collections.FromMachines(tt.machines...) + } + setRollingOutCondition(ctx, tt.machineDeployment, machines, tt.getMachinesSucceeded) + + condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentRollingOutV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + func Test_setScalingUpCondition(t *testing.T) { defaultMachineDeployment := &clusterv1.MachineDeployment{ Spec: clusterv1.MachineDeploymentSpec{ diff --git a/util/conditions/v1beta2/sort.go b/util/conditions/v1beta2/sort.go index 885dfdcadcc4..4f96fac52cbf 100644 --- a/util/conditions/v1beta2/sort.go +++ b/util/conditions/v1beta2/sort.go @@ -76,6 +76,7 @@ func defaultSortLessFunc(i, j metav1.Condition) bool { // | OwnerRemediated | | | | | | x | // | -- Operations -- | | | | | | | // | TopologyReconciled | x | | | | | | +// | RollingOut | x | x | x | | x | | // | Remediating | x | x | x | x | x | | // | ScalingDown | x | x | x | x | x | | // | ScalingUp | x | x | x | x | x | | @@ -112,6 +113,7 @@ var order = []string{ clusterv1.MachineHealthCheckSucceededV1Beta2Condition, clusterv1.MachineOwnerRemediatedV1Beta2Condition, clusterv1.ClusterTopologyReconciledV1Beta2Condition, + clusterv1.RollingOutV1Beta2Condition, clusterv1.RemediatingV1Beta2Condition, clusterv1.ScalingDownV1Beta2Condition, clusterv1.ScalingUpV1Beta2Condition,