diff --git a/test/e2e/clusterctl_upgrade.go b/test/e2e/clusterctl_upgrade.go index af4f8475d821..7168b71d66f2 100644 --- a/test/e2e/clusterctl_upgrade.go +++ b/test/e2e/clusterctl_upgrade.go @@ -18,6 +18,7 @@ package e2e import ( "context" + "errors" "fmt" "io" "net/http" @@ -45,6 +46,8 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/cmd/clusterctl/client/config" + "sigs.k8s.io/cluster-api/controllers/external" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/test/e2e/internal/log" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/bootstrap" @@ -428,8 +431,14 @@ func ClusterctlUpgradeSpec(ctx context.Context, inputGetter func() ClusterctlUpg Name: workloadClusterName, }, input.E2EConfig.GetIntervals(specName, "wait-cluster")...) - expectedMachineCount := *controlPlaneMachineCount + calculateExpectedWorkerCount(ctx, managementClusterProxy.GetClient(), workloadClusterUnstructured, coreCAPIStorageVersion) + By("Calculating expected MachineDeployment and MachinePool Machine and Node counts") + expectedMachineDeploymentMachineCount := calculateExpectedMachineDeploymentMachineCount(ctx, managementClusterProxy.GetClient(), workloadClusterUnstructured, coreCAPIStorageVersion) + expectedMachinePoolNodeCount := calculateExpectedMachinePoolNodeCount(ctx, managementClusterProxy.GetClient(), workloadClusterUnstructured, coreCAPIStorageVersion) + expectedMachinePoolMachineCount, err := calculateExpectedMachinePoolMachineCount(ctx, managementClusterProxy.GetClient(), workloadClusterNamespace, workloadClusterName) + Expect(err).ToNot(HaveOccurred()) + expectedMachineCount := *controlPlaneMachineCount + expectedMachineDeploymentMachineCount + expectedMachinePoolMachineCount + Byf("Expect %d Machines and %d MachinePool replicas to exist", expectedMachineCount, expectedMachinePoolNodeCount) By("Waiting for the machines to exist") Eventually(func() (int64, error) { var n int64 @@ -455,6 +464,26 @@ func ClusterctlUpgradeSpec(ctx context.Context, inputGetter func() ClusterctlUpg return n, nil }, input.E2EConfig.GetIntervals(specName, "wait-worker-nodes")...).Should(Equal(expectedMachineCount), "Timed out waiting for all Machines to exist") + By("Waiting for MachinePool to be ready with correct number of replicas") + Eventually(func() (int64, error) { + var n int64 + machinePoolList := &expv1.MachinePoolList{} + if err := managementClusterProxy.GetClient().List( + ctx, + machinePoolList, + client.InNamespace(workloadClusterNamespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: workloadClusterName}, + ); err == nil { + for _, mp := range machinePoolList.Items { + if mp.Status.Phase == string(expv1.MachinePoolPhaseRunning) { + n += int64(mp.Status.ReadyReplicas) + } + } + } + + return n, nil + }, input.E2EConfig.GetIntervals(specName, "wait-worker-nodes")...).Should(Equal(expectedMachinePoolNodeCount), "Timed out waiting for all MachinePool replicas to be ready") + By("THE MANAGEMENT CLUSTER WITH OLDER VERSION OF PROVIDERS WORKS!") for i, upgrade := range input.Upgrades { @@ -609,20 +638,24 @@ func ClusterctlUpgradeSpec(ctx context.Context, inputGetter func() ClusterctlUpg ClusterName: workloadClusterName, Namespace: workloadClusterNamespace, }) - framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ - ClusterProxy: managementClusterProxy, - Cluster: workloadCluster, - MachineDeployment: testMachineDeployments[0], - Replicas: 2, - WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), - }) - framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ - ClusterProxy: managementClusterProxy, - Cluster: workloadCluster, - MachineDeployment: testMachineDeployments[0], - Replicas: 1, - WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), - }) + if len(testMachineDeployments) > 0 { + framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ + ClusterProxy: managementClusterProxy, + Cluster: workloadCluster, + MachineDeployment: testMachineDeployments[0], + Replicas: 2, + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) + framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ + ClusterProxy: managementClusterProxy, + Cluster: workloadCluster, + MachineDeployment: testMachineDeployments[0], + Replicas: 1, + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) + } else { + Byf("[%d] No MachineDeployments found to scale", i) + } } Byf("[%d] Verify client-side SSA still works", i) @@ -763,8 +796,8 @@ func discoveryAndWaitForCluster(ctx context.Context, input discoveryAndWaitForCl return cluster } -func calculateExpectedWorkerCount(ctx context.Context, c client.Client, unstructuredCluster *unstructured.Unstructured, coreCAPIStorageVersion string) int64 { - var expectedWorkerCount int64 +func calculateExpectedMachineDeploymentMachineCount(ctx context.Context, c client.Client, unstructuredCluster *unstructured.Unstructured, coreCAPIStorageVersion string) int64 { + var expectedMachineDeploymentWorkerCount int64 // Convert v1beta1 unstructured Cluster to clusterv1.Cluster // Only v1beta1 Cluster support ClusterClass (i.e. have cluster.spec.topology). @@ -778,16 +811,10 @@ func calculateExpectedWorkerCount(ctx context.Context, c client.Client, unstruct if md.Replicas == nil { continue } - expectedWorkerCount += int64(*md.Replicas) - } - for _, mp := range cluster.Spec.Topology.Workers.MachinePools { - if mp.Replicas == nil { - continue - } - expectedWorkerCount += int64(*mp.Replicas) + expectedMachineDeploymentWorkerCount += int64(*md.Replicas) } } - return expectedWorkerCount + return expectedMachineDeploymentWorkerCount } } @@ -811,7 +838,67 @@ func calculateExpectedWorkerCount(ctx context.Context, c client.Client, unstruct if !ok { continue } - expectedWorkerCount += replicas + expectedMachineDeploymentWorkerCount += replicas + } + + return expectedMachineDeploymentWorkerCount +} + +func calculateExpectedMachinePoolMachineCount(ctx context.Context, c client.Client, workloadClusterNamespace, workloadClusterName string) (int64, error) { + expectedMachinePoolMachineCount := int64(0) + + machinePoolList := &expv1.MachinePoolList{} + if err := c.List( + ctx, + machinePoolList, + client.InNamespace(workloadClusterNamespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: workloadClusterName}, + ); err == nil { + for _, mp := range machinePoolList.Items { + mp := mp + infraMachinePool, err := external.Get(ctx, c, &mp.Spec.Template.Spec.InfrastructureRef, workloadClusterNamespace) + if err != nil { + return 0, err + } + // Check if the InfraMachinePool has an infrastructureMachineKind field. If it does not, we should skip checking for MachinePool machines. + err = util.UnstructuredUnmarshalField(infraMachinePool, ptr.To(""), "status", "infrastructureMachineKind") + if err != nil && !errors.Is(err, util.ErrUnstructuredFieldNotFound) { + return 0, err + } + if err == nil { + expectedMachinePoolMachineCount += int64(*mp.Spec.Replicas) + } + } + } + + return expectedMachinePoolMachineCount, nil +} + +func calculateExpectedMachinePoolNodeCount(ctx context.Context, c client.Client, unstructuredCluster *unstructured.Unstructured, coreCAPIStorageVersion string) int64 { + var expectedMachinePoolWorkerCount int64 + + // Convert v1beta1 unstructured Cluster to clusterv1.Cluster + // Only v1beta1 Cluster support ClusterClass (i.e. have cluster.spec.topology). + if unstructuredCluster.GroupVersionKind().Version == clusterv1.GroupVersion.Version { + cluster := &clusterv1.Cluster{} + Expect(apiruntime.DefaultUnstructuredConverter.FromUnstructured(unstructuredCluster.Object, cluster)).To(Succeed()) + + if cluster.Spec.Topology != nil { + if cluster.Spec.Topology.Workers != nil { + for _, mp := range cluster.Spec.Topology.Workers.MachinePools { + if mp.Replicas == nil { + continue + } + expectedMachinePoolWorkerCount += int64(*mp.Replicas) + } + } + return expectedMachinePoolWorkerCount + } + } + + byClusterOptions := []client.ListOption{ + client.InNamespace(unstructuredCluster.GetNamespace()), + client.MatchingLabels{clusterv1.ClusterNameLabel: unstructuredCluster.GetName()}, } machinePoolList := &unstructured.UnstructuredList{} @@ -827,16 +914,16 @@ func calculateExpectedWorkerCount(ctx context.Context, c client.Client, unstruct Eventually(func() error { return c.List(ctx, machinePoolList, byClusterOptions...) }, 3*time.Minute, 3*time.Second).Should(Succeed(), "Failed to list MachinePool object for Cluster %s", klog.KObj(unstructuredCluster)) - for _, md := range machinePoolList.Items { - replicas, ok, err := unstructured.NestedInt64(md.Object, "spec", "replicas") + for _, mp := range machinePoolList.Items { + replicas, ok, err := unstructured.NestedInt64(mp.Object, "spec", "replicas") Expect(err).ToNot(HaveOccurred()) if !ok { continue } - expectedWorkerCount += replicas + expectedMachinePoolWorkerCount += replicas } - return expectedWorkerCount + return expectedMachinePoolWorkerCount } // deleteAllClustersAndWaitInput is the input type for deleteAllClustersAndWait.