From e9225c1c57c3b35b47db4937f02f1954e132803f Mon Sep 17 00:00:00 2001 From: Jont828 Date: Mon, 15 May 2023 18:15:16 -0400 Subject: [PATCH] Implement MachinePool Machines in CAPI, CAPD, and clusterctl --- api/v1beta1/common_types.go | 5 + api/v1beta1/machine_types.go | 4 + api/v1beta1/machine_webhook.go | 117 ++++- api/v1beta1/machine_webhook_test.go | 113 +++- api/v1beta1/zz_generated.openapi.go | 12 + cmd/clusterctl/client/tree/discovery.go | 54 +- config/manager/manager.yaml | 4 + config/webhook/manifests.yaml | 1 + .../src/reference/labels_and_annotations.md | 98 ++-- .../controllers/machinepool_controller.go | 19 + .../machinepool_controller_phases.go | 306 +++++++++++ .../machinepool_controller_phases_test.go | 483 ++++++++++++++++++ .../controllers/machine/machine_controller.go | 17 + .../machine/machine_controller_phases.go | 18 +- .../docker/api/v1alpha3/conversion.go | 26 +- .../api/v1alpha3/zz_generated.conversion.go | 16 +- .../docker/api/v1alpha4/conversion.go | 26 +- .../api/v1alpha4/zz_generated.conversion.go | 16 +- .../docker/api/v1beta1/dockermachine_types.go | 7 + ...e.cluster.x-k8s.io_dockermachinepools.yaml | 50 ++ ...cture.cluster.x-k8s.io_dockermachines.yaml | 7 + ...uster.x-k8s.io_dockermachinetemplates.yaml | 8 + .../docker/exp/api/v1alpha3/conversion.go | 29 +- .../api/v1alpha3/zz_generated.conversion.go | 9 +- .../docker/exp/api/v1alpha4/conversion.go | 29 +- .../api/v1alpha4/zz_generated.conversion.go | 9 +- .../api/v1beta1/dockermachinepool_types.go | 20 +- .../api/v1beta1/dockermachinepool_webhook.go | 4 +- .../exp/api/v1beta1/zz_generated.deepcopy.go | 1 + .../dockermachinepool_controller.go | 208 +++++++- .../docker/exp/internal/docker/nodepool.go | 85 ++- .../controllers/dockermachine_controller.go | 150 +++--- util/labels/helpers.go | 11 + util/labels/helpers_test.go | 45 ++ 34 files changed, 1747 insertions(+), 260 deletions(-) diff --git a/api/v1beta1/common_types.go b/api/v1beta1/common_types.go index 56c6807f825f..97284bd74df6 100644 --- a/api/v1beta1/common_types.go +++ b/api/v1beta1/common_types.go @@ -61,6 +61,11 @@ const ( // update that disallows a pre-existing Cluster to be populated with Topology information and Class. ClusterTopologyUnsafeUpdateClassNameAnnotation = "unsafe.topology.cluster.x-k8s.io/disable-update-class-name-check" + // FallbackMachineLabel indicates that a Machine belongs to a MachinePool that does not support MachinePool Machines. + // As such, these Machines exist to create a consistent user experience and will not have an infrastructure reference. The user will + // also be prevented from deleting these Machines. + FallbackMachineLabel = "machinepool.cluster.x-k8s.io/fallback-machine" + // ProviderNameLabel is the label set on components in the provider manifest. // This label allows to easily identify all the components belonging to a provider; the clusterctl // tool uses this label for implementing provider's lifecycle operations. diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go index 21bfa548f652..ee9597306cec 100644 --- a/api/v1beta1/machine_types.go +++ b/api/v1beta1/machine_types.go @@ -43,6 +43,10 @@ const ( // MachineDeploymentNameLabel is the label set on machines if they're controlled by MachineDeployment. MachineDeploymentNameLabel = "cluster.x-k8s.io/deployment-name" + // MachinePoolNameLabel is the label indicating the name of the MachinePool a Machine is controlled by. + // Note: The value of this label may be a hash if the MachinePool name is longer than 63 characters. + MachinePoolNameLabel = "cluster.x-k8s.io/pool-name" + // MachineControlPlaneNameLabel is the label set on machines if they're controlled by a ControlPlane. // Note: The value of this label may be a hash if the control plane name is longer than 63 characters. MachineControlPlaneNameLabel = "cluster.x-k8s.io/control-plane-name" diff --git a/api/v1beta1/machine_webhook.go b/api/v1beta1/machine_webhook.go index 4a21872eaa51..a0425b88897c 100644 --- a/api/v1beta1/machine_webhook.go +++ b/api/v1beta1/machine_webhook.go @@ -17,7 +17,9 @@ limitations under the License. package v1beta1 import ( + "context" "fmt" + "os" "strings" "time" @@ -27,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/util/validation/field" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" "sigs.k8s.io/cluster-api/util/version" ) @@ -35,14 +38,15 @@ const defaultNodeDeletionTimeout = 10 * time.Second func (m *Machine) SetupWebhookWithManager(mgr ctrl.Manager) error { return ctrl.NewWebhookManagedBy(mgr). - For(m). + For(&Machine{}). + WithValidator(MachineValidator(mgr.GetScheme())). Complete() } -// +kubebuilder:webhook:verbs=create;update,path=/validate-cluster-x-k8s-io-v1beta1-machine,mutating=false,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=machines,versions=v1beta1,name=validation.machine.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1 +// +kubebuilder:webhook:verbs=create;update;delete,path=/validate-cluster-x-k8s-io-v1beta1-machine,mutating=false,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=machines,versions=v1beta1,name=validation.machine.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1 // +kubebuilder:webhook:verbs=create;update,path=/mutate-cluster-x-k8s-io-v1beta1-machine,mutating=true,failurePolicy=fail,matchPolicy=Equivalent,groups=cluster.x-k8s.io,resources=machines,versions=v1beta1,name=default.machine.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1 -var _ webhook.Validator = &Machine{} +var _ webhook.CustomValidator = &machineValidator{} var _ webhook.Defaulter = &Machine{} // Default implements webhook.Defaulter so a webhook will be registered for the type. @@ -57,7 +61,10 @@ func (m *Machine) Default() { } if m.Spec.InfrastructureRef.Namespace == "" { - m.Spec.InfrastructureRef.Namespace = m.Namespace + // Don't autofill namespace for MachinePool Machines since the infraRef will be populated by the MachinePool controller. + if !isMachinePoolMachine(m) { + m.Spec.InfrastructureRef.Namespace = m.Namespace + } } if m.Spec.Version != nil && !strings.HasPrefix(*m.Spec.Version, "v") { @@ -70,22 +77,60 @@ func (m *Machine) Default() { } } +// MachineValidator creates a new CustomValidator for Machines. +func MachineValidator(_ *runtime.Scheme) webhook.CustomValidator { + return &machineValidator{} +} + +// machineValidator implements a defaulting webhook for Machine. +type machineValidator struct{} + // ValidateCreate implements webhook.Validator so a webhook will be registered for the type. -func (m *Machine) ValidateCreate() error { +func (*machineValidator) ValidateCreate(_ context.Context, obj runtime.Object) error { + m, ok := obj.(*Machine) + if !ok { + return apierrors.NewBadRequest(fmt.Sprintf("expected a Machine but got a %T", obj)) + } + return m.validate(nil) } // ValidateUpdate implements webhook.Validator so a webhook will be registered for the type. -func (m *Machine) ValidateUpdate(old runtime.Object) error { - oldM, ok := old.(*Machine) +func (*machineValidator) ValidateUpdate(_ context.Context, oldObj runtime.Object, newObj runtime.Object) error { + newM, ok := newObj.(*Machine) + if !ok { + return apierrors.NewBadRequest(fmt.Sprintf("expected a Machine but got a %T", newObj)) + } + + oldM, ok := oldObj.(*Machine) if !ok { - return apierrors.NewBadRequest(fmt.Sprintf("expected a Machine but got a %T", old)) + return apierrors.NewBadRequest(fmt.Sprintf("expected a Machine but got a %T", oldObj)) } - return m.validate(oldM) + return newM.validate(oldM) } // ValidateDelete implements webhook.Validator so a webhook will be registered for the type. -func (m *Machine) ValidateDelete() error { +func (*machineValidator) ValidateDelete(ctx context.Context, obj runtime.Object) error { + m, ok := obj.(*Machine) + if !ok { + return apierrors.NewBadRequest(fmt.Sprintf("expected a Machine but got a %T", obj)) + } + + req, err := admission.RequestFromContext(ctx) + if err != nil { + return apierrors.NewBadRequest(fmt.Sprintf("expected a admission.Request inside context: %v", err)) + } + + // Fallback machines are placeholders for InfraMachinePools that do not support MachinePool Machines. These have + // no bootstrap or infrastructure data and cannot be deleted by users. They instead exist to provide a consistent + // user experience for MachinePool Machines. + if _, isFallbackMachine := m.Labels[FallbackMachineLabel]; isFallbackMachine { + // Only allow the request if it is coming from the CAPI controller service account. + if req.UserInfo.Username != "system:serviceaccount:"+os.Getenv("POD_NAMESPACE")+":"+os.Getenv("POD_SERVICE_ACCOUNT") { + return apierrors.NewBadRequest("this Machine is a placeholder for InfraMachinePools that do not support MachinePool Machines and cannot be deleted by users, scale down the MachinePool instead to delete") + } + } + return nil } @@ -93,13 +138,16 @@ func (m *Machine) validate(old *Machine) error { var allErrs field.ErrorList specPath := field.NewPath("spec") if m.Spec.Bootstrap.ConfigRef == nil && m.Spec.Bootstrap.DataSecretName == nil { - allErrs = append( - allErrs, - field.Required( - specPath.Child("bootstrap", "data"), - "expected either spec.bootstrap.dataSecretName or spec.bootstrap.configRef to be populated", - ), - ) + // MachinePool Machines don't have a bootstrap configRef, so don't require it. The bootstrap config is instead owned by the MachinePool. + if !isMachinePoolMachine(m) { + allErrs = append( + allErrs, + field.Required( + specPath.Child("bootstrap", "data"), + "expected either spec.bootstrap.dataSecretName or spec.bootstrap.configRef to be populated", + ), + ) + } } if m.Spec.Bootstrap.ConfigRef != nil && m.Spec.Bootstrap.ConfigRef.Namespace != m.Namespace { @@ -113,15 +161,18 @@ func (m *Machine) validate(old *Machine) error { ) } - if m.Spec.InfrastructureRef.Namespace != m.Namespace { - allErrs = append( - allErrs, - field.Invalid( - specPath.Child("infrastructureRef", "namespace"), - m.Spec.InfrastructureRef.Namespace, - "must match metadata.namespace", - ), - ) + // InfraRef can be empty for MachinePool Machines so skip the check in that case. + if !isMachinePoolMachine(m) { + if m.Spec.InfrastructureRef.Namespace != m.Namespace { + allErrs = append( + allErrs, + field.Invalid( + specPath.Child("infrastructureRef", "namespace"), + m.Spec.InfrastructureRef.Namespace, + "must match metadata.namespace", + ), + ) + } } if old != nil && old.Spec.ClusterName != m.Spec.ClusterName { @@ -142,3 +193,17 @@ func (m *Machine) validate(old *Machine) error { } return apierrors.NewInvalid(GroupVersion.WithKind("Machine").GroupKind(), m.Name, allErrs) } + +func isMachinePoolMachine(m *Machine) bool { + if m.OwnerReferences == nil { + return false + } + + for _, owner := range m.OwnerReferences { + if owner.Kind == "MachinePool" { + return true + } + } + + return false +} diff --git a/api/v1beta1/machine_webhook_test.go b/api/v1beta1/machine_webhook_test.go index 485330aa3355..0d94dfe75dbf 100644 --- a/api/v1beta1/machine_webhook_test.go +++ b/api/v1beta1/machine_webhook_test.go @@ -17,14 +17,15 @@ limitations under the License. package v1beta1 import ( + "context" "testing" . "github.com/onsi/gomega" + admissionv1 "k8s.io/api/admission/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/pointer" - - utildefaulting "sigs.k8s.io/cluster-api/util/defaulting" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" ) func TestMachineDefault(t *testing.T) { @@ -39,7 +40,10 @@ func TestMachineDefault(t *testing.T) { Version: pointer.String("1.17.5"), }, } - t.Run("for Machine", utildefaulting.DefaultValidateTest(m)) + scheme, err := SchemeBuilder.Build() + g.Expect(err).ToNot(HaveOccurred()) + validator := MachineValidator(scheme) + t.Run("for Machine", defaultDefaulterTestCustomValidator(m, validator)) m.Default() g.Expect(m.Labels[ClusterNameLabel]).To(Equal(m.Spec.ClusterName)) @@ -75,15 +79,25 @@ func TestMachineBootstrapValidation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) + scheme, err := SchemeBuilder.Build() + g.Expect(err).ToNot(HaveOccurred()) + validator := MachineValidator(scheme) + + ctx := admission.NewContextWithRequest(context.Background(), admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Operation: admissionv1.Create, + }, + }) + m := &Machine{ Spec: MachineSpec{Bootstrap: tt.bootstrap}, } if tt.expectErr { - g.Expect(m.ValidateCreate()).NotTo(Succeed()) - g.Expect(m.ValidateUpdate(m)).NotTo(Succeed()) + g.Expect(validator.ValidateCreate(ctx, m)).NotTo(Succeed()) + g.Expect(validator.ValidateUpdate(ctx, m, m)).NotTo(Succeed()) } else { - g.Expect(m.ValidateCreate()).To(Succeed()) - g.Expect(m.ValidateUpdate(m)).To(Succeed()) + g.Expect(validator.ValidateCreate(ctx, m)).To(Succeed()) + g.Expect(validator.ValidateUpdate(ctx, m, m)).To(Succeed()) } }) } @@ -130,6 +144,15 @@ func TestMachineNamespaceValidation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) + scheme, err := SchemeBuilder.Build() + g.Expect(err).ToNot(HaveOccurred()) + validator := MachineValidator(scheme) + + ctx := admission.NewContextWithRequest(context.Background(), admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Operation: admissionv1.Create, + }, + }) m := &Machine{ ObjectMeta: metav1.ObjectMeta{Namespace: tt.namespace}, @@ -137,11 +160,11 @@ func TestMachineNamespaceValidation(t *testing.T) { } if tt.expectErr { - g.Expect(m.ValidateCreate()).NotTo(Succeed()) - g.Expect(m.ValidateUpdate(m)).NotTo(Succeed()) + g.Expect(validator.ValidateCreate(ctx, m)).NotTo(Succeed()) + g.Expect(validator.ValidateUpdate(ctx, m, m)).NotTo(Succeed()) } else { - g.Expect(m.ValidateCreate()).To(Succeed()) - g.Expect(m.ValidateUpdate(m)).To(Succeed()) + g.Expect(validator.ValidateCreate(ctx, m)).To(Succeed()) + g.Expect(validator.ValidateUpdate(ctx, m, m)).To(Succeed()) } }) } @@ -171,6 +194,15 @@ func TestMachineClusterNameImmutable(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) + scheme, err := SchemeBuilder.Build() + g.Expect(err).ToNot(HaveOccurred()) + validator := MachineValidator(scheme) + + ctx := admission.NewContextWithRequest(context.Background(), admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Operation: admissionv1.Create, + }, + }) newMachine := &Machine{ Spec: MachineSpec{ @@ -186,9 +218,9 @@ func TestMachineClusterNameImmutable(t *testing.T) { } if tt.expectErr { - g.Expect(newMachine.ValidateUpdate(oldMachine)).NotTo(Succeed()) + g.Expect(validator.ValidateUpdate(ctx, oldMachine, newMachine)).NotTo(Succeed()) } else { - g.Expect(newMachine.ValidateUpdate(oldMachine)).To(Succeed()) + g.Expect(validator.ValidateUpdate(ctx, oldMachine, newMachine)).To(Succeed()) } }) } @@ -230,6 +262,15 @@ func TestMachineVersionValidation(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) + scheme, err := SchemeBuilder.Build() + g.Expect(err).ToNot(HaveOccurred()) + validator := MachineValidator(scheme) + + ctx := admission.NewContextWithRequest(context.Background(), admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Operation: admissionv1.Create, + }, + }) m := &Machine{ Spec: MachineSpec{ @@ -239,12 +280,50 @@ func TestMachineVersionValidation(t *testing.T) { } if tt.expectErr { - g.Expect(m.ValidateCreate()).NotTo(Succeed()) - g.Expect(m.ValidateUpdate(m)).NotTo(Succeed()) + g.Expect(validator.ValidateCreate(ctx, m)).NotTo(Succeed()) + g.Expect(validator.ValidateUpdate(ctx, m, m)).NotTo(Succeed()) } else { - g.Expect(m.ValidateCreate()).To(Succeed()) - g.Expect(m.ValidateUpdate(m)).To(Succeed()) + g.Expect(validator.ValidateCreate(ctx, m)).To(Succeed()) + g.Expect(validator.ValidateUpdate(ctx, m, m)).To(Succeed()) } }) } } + +// defaultDefaulterTestCustomVAlidator returns a new testing function to be used in tests to +// make sure defaulting webhooks also pass validation tests on create, update and delete. +// Note: The difference to util/defaulting.DefaultValidateTest is that this function takes an additional +// CustomValidator as the validation is not implemented on the object directly. +func defaultDefaulterTestCustomValidator(object admission.Defaulter, customValidator admission.CustomValidator) func(*testing.T) { + return func(t *testing.T) { + t.Helper() + + createCopy := object.DeepCopyObject().(admission.Defaulter) + updateCopy := object.DeepCopyObject().(admission.Defaulter) + deleteCopy := object.DeepCopyObject().(admission.Defaulter) + defaultingUpdateCopy := updateCopy.DeepCopyObject().(admission.Defaulter) + + ctx := admission.NewContextWithRequest(context.Background(), admission.Request{ + AdmissionRequest: admissionv1.AdmissionRequest{ + Operation: admissionv1.Create, + }, + }) + + t.Run("validate-on-create", func(t *testing.T) { + g := NewWithT(t) + createCopy.Default() + g.Expect(customValidator.ValidateCreate(ctx, createCopy)).To(Succeed()) + }) + t.Run("validate-on-update", func(t *testing.T) { + g := NewWithT(t) + defaultingUpdateCopy.Default() + updateCopy.Default() + g.Expect(customValidator.ValidateUpdate(ctx, defaultingUpdateCopy, updateCopy)).To(Succeed()) + }) + t.Run("validate-on-delete", func(t *testing.T) { + g := NewWithT(t) + deleteCopy.Default() + g.Expect(customValidator.ValidateDelete(ctx, deleteCopy)).To(Succeed()) + }) + } +} diff --git a/api/v1beta1/zz_generated.openapi.go b/api/v1beta1/zz_generated.openapi.go index 45a5e207e84a..b6eb18c43289 100644 --- a/api/v1beta1/zz_generated.openapi.go +++ b/api/v1beta1/zz_generated.openapi.go @@ -93,6 +93,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "sigs.k8s.io/cluster-api/api/v1beta1.WorkersClass": schema_sigsk8sio_cluster_api_api_v1beta1_WorkersClass(ref), "sigs.k8s.io/cluster-api/api/v1beta1.WorkersTopology": schema_sigsk8sio_cluster_api_api_v1beta1_WorkersTopology(ref), "sigs.k8s.io/cluster-api/api/v1beta1.machineDeploymentDefaulter": schema_sigsk8sio_cluster_api_api_v1beta1_machineDeploymentDefaulter(ref), + "sigs.k8s.io/cluster-api/api/v1beta1.machineValidator": schema_sigsk8sio_cluster_api_api_v1beta1_machineValidator(ref), } } @@ -3324,3 +3325,14 @@ func schema_sigsk8sio_cluster_api_api_v1beta1_machineDeploymentDefaulter(ref com "sigs.k8s.io/controller-runtime/pkg/webhook/admission.Decoder"}, } } + +func schema_sigsk8sio_cluster_api_api_v1beta1_machineValidator(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "machineValidator implements a defaulting webhook for Machine.", + Type: []string{"object"}, + }, + }, + } +} diff --git a/cmd/clusterctl/client/tree/discovery.go b/cmd/clusterctl/client/tree/discovery.go index e8ea53cdc462..b118bc1b395b 100644 --- a/cmd/clusterctl/client/tree/discovery.go +++ b/cmd/clusterctl/client/tree/discovery.go @@ -111,12 +111,16 @@ func Discovery(ctx context.Context, c client.Client, namespace, name string, opt machineMap[m.Name] = true if visible { - if machineInfra, err := external.Get(ctx, c, &m.Spec.InfrastructureRef, cluster.Namespace); err == nil { - tree.Add(m, machineInfra, ObjectMetaName("MachineInfrastructure"), NoEcho(true)) + if (m.Spec.InfrastructureRef != corev1.ObjectReference{}) { + if machineInfra, err := external.Get(ctx, c, &m.Spec.InfrastructureRef, cluster.Namespace); err == nil { + tree.Add(m, machineInfra, ObjectMetaName("MachineInfrastructure"), NoEcho(true)) + } } - if machineBootstrap, err := external.Get(ctx, c, m.Spec.Bootstrap.ConfigRef, cluster.Namespace); err == nil { - tree.Add(m, machineBootstrap, ObjectMetaName("BootstrapConfig"), NoEcho(true)) + if m.Spec.Bootstrap.ConfigRef != nil { + if machineBootstrap, err := external.Get(ctx, c, m.Spec.Bootstrap.ConfigRef, cluster.Namespace); err == nil { + tree.Add(m, machineBootstrap, ObjectMetaName("BootstrapConfig"), NoEcho(true)) + } } } } @@ -146,25 +150,25 @@ func Discovery(ctx context.Context, c client.Client, namespace, name string, opt if err != nil { return nil, err } - - // Handles orphan machines. - if len(machineMap) < len(machinesList.Items) { - other := VirtualObject(cluster.Namespace, "OtherGroup", "Other") - tree.Add(workers, other) - - for i := range machinesList.Items { - m := &machinesList.Items[i] - if _, ok := machineMap[m.Name]; ok { - continue - } - addMachineFunc(other, m) - } - } } if len(machinePoolList.Items) > 0 { // Add MachinePool objects tree.Add(cluster, workers) - addMachinePoolsToObjectTree(ctx, c, cluster.Namespace, workers, machinePoolList, tree) + addMachinePoolsToObjectTree(ctx, c, cluster.Namespace, workers, machinePoolList, machinesList, tree, addMachineFunc) + } + + // Handles orphan machines. + if len(machineMap) < len(machinesList.Items) { + other := VirtualObject(cluster.Namespace, "OtherGroup", "Other") + tree.Add(workers, other) + + for i := range machinesList.Items { + m := &machinesList.Items[i] + if _, ok := machineMap[m.Name]; ok { + continue + } + addMachineFunc(other, m) + } } return tree, nil @@ -268,10 +272,10 @@ func addMachineDeploymentToObjectTree(ctx context.Context, c client.Client, clus return nil } -func addMachinePoolsToObjectTree(ctx context.Context, c client.Client, namespace string, workers *unstructured.Unstructured, machinePoolList *expv1.MachinePoolList, tree *ObjectTree) { +func addMachinePoolsToObjectTree(ctx context.Context, c client.Client, namespace string, workers *unstructured.Unstructured, machinePoolList *expv1.MachinePoolList, machinesList *clusterv1.MachineList, tree *ObjectTree, addMachineFunc func(parent client.Object, m *clusterv1.Machine)) { for i := range machinePoolList.Items { mp := &machinePoolList.Items[i] - _, visible := tree.Add(workers, mp) + _, visible := tree.Add(workers, mp, GroupingObject(true)) if visible { if machinePoolBootstrap, err := external.Get(ctx, c, mp.Spec.Template.Spec.Bootstrap.ConfigRef, namespace); err == nil { @@ -279,9 +283,15 @@ func addMachinePoolsToObjectTree(ctx context.Context, c client.Client, namespace } if machinePoolInfra, err := external.Get(ctx, c, &mp.Spec.Template.Spec.InfrastructureRef, namespace); err == nil { - tree.Add(mp, machinePoolInfra, ObjectMetaName("MachineInfrastructure"), NoEcho(true)) + tree.Add(mp, machinePoolInfra, ObjectMetaName("MachinePoolInfrastructure"), NoEcho(true)) } } + + machines := selectMachinesControlledBy(machinesList, mp) + + for _, m := range machines { + addMachineFunc(mp, m) + } } } diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 4d1b70b16622..f87ceaee818f 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -38,6 +38,10 @@ spec: valueFrom: fieldRef: fieldPath: metadata.uid + - name: POD_SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName ports: - containerPort: 9440 name: healthz diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index 1054578ea4f7..c0d8363580eb 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -227,6 +227,7 @@ webhooks: operations: - CREATE - UPDATE + - DELETE resources: - machines sideEffects: None diff --git a/docs/book/src/reference/labels_and_annotations.md b/docs/book/src/reference/labels_and_annotations.md index 2b34e038a73c..b3ef165c5601 100644 --- a/docs/book/src/reference/labels_and_annotations.md +++ b/docs/book/src/reference/labels_and_annotations.md @@ -1,56 +1,58 @@ **Supported Labels:** -| Label | Note | -|:------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| cluster.x-k8s.io/cluster-name | It is set on machines linked to a cluster and external objects(bootstrap and infrastructure providers). | -| topology.cluster.x-k8s.io/owned | It is set on all the object which are managed as part of a ClusterTopology. | -| topology.cluster.x-k8s.io/deployment-name | It is set on the generated MachineDeployment objects to track the name of the MachineDeployment topology it represents. | -| cluster.x-k8s.io/provider | It is set on components in the provider manifest. The label allows one to easily identify all the components belonging to a provider. The clusterctl tool uses this label for implementing provider's lifecycle operations. | -| cluster.x-k8s.io/watch-filter | It can be applied to any Cluster API object. Controllers which allow for selective reconciliation may check this label and proceed with reconciliation of the object only if this label and a configured value is present. | -| cluster.x-k8s.io/interruptible | It is used to mark the nodes that run on interruptible instances. | -| cluster.x-k8s.io/control-plane | It is set on machines or related objects that are part of a control plane. | -| cluster.x-k8s.io/set-name | It is set on machines if they're controlled by MachineSet. The value of this label may be a hash if the MachineSet name is longer than 63 characters. | -| cluster.x-k8s.io/control-plane-name | It is set on machines if they're controlled by a control plane. The value of this label may be a hash if the control plane name is longer than 63 characters. | -| cluster.x-k8s.io/deployment-name | It is set on machines if they're controlled by a MachineDeployment. | -| machine-template-hash | It is applied to Machines in a MachineDeployment containing the hash of the template. | +| Label | Note | +| :-------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| cluster.x-k8s.io/cluster-name | It is set on machines linked to a cluster and external objects(bootstrap and infrastructure providers). | +| topology.cluster.x-k8s.io/owned | It is set on all the object which are managed as part of a ClusterTopology. | +| topology.cluster.x-k8s.io/deployment-name | It is set on the generated MachineDeployment objects to track the name of the MachineDeployment topology it represents. | +| cluster.x-k8s.io/provider | It is set on components in the provider manifest. The label allows one to easily identify all the components belonging to a provider. The clusterctl tool uses this label for implementing provider's lifecycle operations. | +| cluster.x-k8s.io/watch-filter | It can be applied to any Cluster API object. Controllers which allow for selective reconciliation may check this label and proceed with reconciliation of the object only if this label and a configured value is present. | +| cluster.x-k8s.io/interruptible | It is used to mark the nodes that run on interruptible instances. | +| cluster.x-k8s.io/control-plane | It is set on machines or related objects that are part of a control plane. | +| cluster.x-k8s.io/set-name | It is set on machines if they're controlled by MachineSet. The value of this label may be a hash if the MachineSet name is longer than 63 characters. | +| cluster.x-k8s.io/control-plane-name | It is set on machines if they're controlled by a control plane. The value of this label may be a hash if the control plane name is longer than 63 characters. | +| cluster.x-k8s.io/deployment-name | It is set on machines if they're controlled by a MachineDeployment. | +| cluster.x-k8s.io/pool-name | It is set on machines if they're controlled by a MachinePool. | +| machinepool.cluster.x-k8s.io/fallback-machine | It is set on machines Machine belonging to a MachinePool that does not support MachinePool Machines. These Machines exist to create a consistent user experience and will not have an infrastructure reference. The user will also be prevented from deleting these Machines. | +| machine-template-hash | It is applied to Machines in a MachineDeployment containing the hash of the template. |
**Supported Annotations:** -| Annotation | Note | -|:-----------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| clusterctl.cluster.x-k8s.io/skip-crd-name-preflight-check | Can be placed on provider CRDs, so that clusterctl doesn't emit an error if the CRD doesn't comply with Cluster APIs naming scheme. Only CRDs that are referenced by core Cluster API CRDs have to comply with the naming scheme. | -| clusterctl.cluster.x-k8s.io/delete-for-move | DeleteForMoveAnnotation will be set to objects that are going to be deleted from the source cluster after being moved to the target cluster during the clusterctl move operation. It will help any validation webhook to take decision based on it. | -| unsafe.topology.cluster.x-k8s.io/disable-update-class-name-check | It can be used to disable the webhook check on update that disallows a pre-existing Cluster to be populated with Topology information and Class. | -| cluster.x-k8s.io/cluster-name | It is set on nodes identifying the name of the cluster the node belongs to. | -| cluster.x-k8s.io/cluster-namespace | It is set on nodes identifying the namespace of the cluster the node belongs to. | -| cluster.x-k8s.io/machine | It is set on nodes identifying the machine the node belongs to. | -| cluster.x-k8s.io/owner-kind | It is set on nodes identifying the owner kind. | -| cluster.x-k8s.io/owner-name | It is set on nodes identifying the owner name. | -| cluster.x-k8s.io/paused | It can be applied to any Cluster API object to prevent a controller from processing a resource. Controllers working with Cluster API objects must check the existence of this annotation on the reconciled object. | -| cluster.x-k8s.io/disable-machine-create | It can be used to signal a MachineSet to stop creating new machines. It is utilized in the OnDelete MachineDeploymentStrategy to allow the MachineDeployment controller to scale down older MachineSets when Machines are deleted and add the new replicas to the latest MachineSet. | -| cluster.x-k8s.io/delete-machine | It marks control plane and worker nodes that will be given priority for deletion when KCP or a MachineSet scales down. It is given top priority on all delete policies. | -| cluster.x-k8s.io/cloned-from-name | It is the infrastructure machine annotation that stores the name of the infrastructure template resource that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation. | -| cluster.x-k8s.io/cloned-from-groupkind | It is the infrastructure machine annotation that stores the group-kind of the infrastructure template resource that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation. | -| cluster.x-k8s.io/skip-remediation | It is used to mark the machines that should not be considered for remediation by MachineHealthCheck reconciler. | -| cluster.x-k8s.io/managed-by | It can be applied to InfraCluster resources to signify that some external system is managing the cluster infrastructure. Provider InfraCluster controllers will ignore resources with this annotation. An external controller must fulfill the contract of the InfraCluster resource. External infrastructure providers should ensure that the annotation, once set, cannot be removed. | -| cluster.x-k8s.io/replicas-managed-by | It can be applied to MachinePool resources to signify that some external system is managing infrastructure scaling for that pool. See [the MachinePool documentation](../developer/architecture/controllers/machine-pool.md#externally-managed-autoscaler) for more details. | -| topology.cluster.x-k8s.io/defer-upgrade | It can be used to defer the Kubernetes upgrade of a single MachineDeployment topology. If the annotation is set on a MachineDeployment topology in Cluster.spec.topology.workers, the Kubernetes upgrade for this MachineDeployment topology is deferred. It doesn't affect other MachineDeployment topologies. | -| topology.cluster.x-k8s.io/dry-run | It is an annotation that gets set on objects by the topology controller only during a server side dry run apply operation. It is used for validating update webhooks for objects which get updated by template rotation (e.g. InfrastructureMachineTemplate). When the annotation is set and the admission request is a dry run, the webhook should deny validation due to immutability. By that the request will succeed (without any changes to the actual object because it is a dry run) and the topology controller will receive the resulting object. | -| topology.cluster.x-k8s.io/hold-upgrade-sequence | It can be used to hold the entire MachineDeployment upgrade sequence. If the annotation is set on a MachineDeployment topology in Cluster.spec.topology.workers, the Kubernetes upgrade for this MachineDeployment topology and all subsequent ones is deferred. | -| machine.cluster.x-k8s.io/certificates-expiry | It captures the expiry date of the machine certificates in RFC3339 format. It is used to trigger rollout of control plane machines before certificates expire. It can be set on BootstrapConfig and Machine objects. The value set on Machine object takes precedence. The annotation is only used by control plane machines. | -| machine.cluster.x-k8s.io/exclude-node-draining | It explicitly skips node draining if set. | -| machine.cluster.x-k8s.io/exclude-wait-for-node-volume-detach | It explicitly skips the waiting for node volume detaching if set. | -| pre-drain.delete.hook.machine.cluster.x-k8s.io | It specifies the prefix we search each annotation for during the pre-drain.delete lifecycle hook to pause reconciliation of deletion. These hooks will prevent removal of draining the associated node until all are removed. | -| pre-terminate.delete.hook.machine.cluster.x-k8s.io | It specifies the prefix we search each annotation for during the pre-terminate.delete lifecycle hook to pause reconciliation of deletion. These hooks will prevent removal of an instance from an infrastructure provider until all are removed. | -| machinedeployment.clusters.x-k8s.io/revision | It is the revision annotation of a machine deployment's machine sets which records its rollout sequence. | -| machinedeployment.clusters.x-k8s.io/revision-history | It maintains the history of all old revisions that a machine set has served for a machine deployment. | -| machinedeployment.clusters.x-k8s.io/desired-replicas | It is the desired replicas for a machine deployment recorded as an annotation in its machine sets. Helps in separating scaling events from the rollout process and for determining if the new machine set for a deployment is really saturated. | -| machinedeployment.clusters.x-k8s.io/max-replicas | It is the maximum replicas a deployment can have at a given point, which is machinedeployment.spec.replicas + maxSurge. Used by the underlying machine sets to estimate their proportions in case the deployment has surge replicas. | -| controlplane.cluster.x-k8s.io/skip-coredns | It explicitly skips reconciling CoreDNS if set. | -| controlplane.cluster.x-k8s.io/skip-kube-proxy | It explicitly skips reconciling kube-proxy if set. | -| controlplane.cluster.x-k8s.io/kubeadm-cluster-configuration | It is a machine annotation that stores the json-marshalled string of KCP ClusterConfiguration. This annotation is used to detect any changes in ClusterConfiguration and trigger machine rollout in KCP. | -| controlplane.cluster.x-k8s.io/remediation-in-progress | It is a KCP annotation that tracks that the system is in between having deleted an unhealthy machine and recreating its replacement. | -| controlplane.cluster.x-k8s.io/remediation-for | It is a machine annotation that links a new machine to the unhealthy machine it is replacing. | +| Annotation | Note | +| :--------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| clusterctl.cluster.x-k8s.io/skip-crd-name-preflight-check | Can be placed on provider CRDs, so that clusterctl doesn't emit an error if the CRD doesn't comply with Cluster APIs naming scheme. Only CRDs that are referenced by core Cluster API CRDs have to comply with the naming scheme. | +| clusterctl.cluster.x-k8s.io/delete-for-move | DeleteForMoveAnnotation will be set to objects that are going to be deleted from the source cluster after being moved to the target cluster during the clusterctl move operation. It will help any validation webhook to take decision based on it. | +| unsafe.topology.cluster.x-k8s.io/disable-update-class-name-check | It can be used to disable the webhook check on update that disallows a pre-existing Cluster to be populated with Topology information and Class. | +| cluster.x-k8s.io/cluster-name | It is set on nodes identifying the name of the cluster the node belongs to. | +| cluster.x-k8s.io/cluster-namespace | It is set on nodes identifying the namespace of the cluster the node belongs to. | +| cluster.x-k8s.io/machine | It is set on nodes identifying the machine the node belongs to. | +| cluster.x-k8s.io/owner-kind | It is set on nodes identifying the owner kind. | +| cluster.x-k8s.io/owner-name | It is set on nodes identifying the owner name. | +| cluster.x-k8s.io/paused | It can be applied to any Cluster API object to prevent a controller from processing a resource. Controllers working with Cluster API objects must check the existence of this annotation on the reconciled object. | +| cluster.x-k8s.io/disable-machine-create | It can be used to signal a MachineSet to stop creating new machines. It is utilized in the OnDelete MachineDeploymentStrategy to allow the MachineDeployment controller to scale down older MachineSets when Machines are deleted and add the new replicas to the latest MachineSet. | +| cluster.x-k8s.io/delete-machine | It marks control plane and worker nodes that will be given priority for deletion when KCP or a MachineSet scales down. It is given top priority on all delete policies. | +| cluster.x-k8s.io/cloned-from-name | It is the infrastructure machine annotation that stores the name of the infrastructure template resource that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation. | +| cluster.x-k8s.io/cloned-from-groupkind | It is the infrastructure machine annotation that stores the group-kind of the infrastructure template resource that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation. | +| cluster.x-k8s.io/skip-remediation | It is used to mark the machines that should not be considered for remediation by MachineHealthCheck reconciler. | +| cluster.x-k8s.io/managed-by | It can be applied to InfraCluster resources to signify that some external system is managing the cluster infrastructure. Provider InfraCluster controllers will ignore resources with this annotation. An external controller must fulfill the contract of the InfraCluster resource. External infrastructure providers should ensure that the annotation, once set, cannot be removed. | +| cluster.x-k8s.io/replicas-managed-by | It can be applied to MachinePool resources to signify that some external system is managing infrastructure scaling for that pool. See [the MachinePool documentation](../developer/architecture/controllers/machine-pool.md#externally-managed-autoscaler) for more details. | +| topology.cluster.x-k8s.io/defer-upgrade | It can be used to defer the Kubernetes upgrade of a single MachineDeployment topology. If the annotation is set on a MachineDeployment topology in Cluster.spec.topology.workers, the Kubernetes upgrade for this MachineDeployment topology is deferred. It doesn't affect other MachineDeployment topologies. | +| topology.cluster.x-k8s.io/dry-run | It is an annotation that gets set on objects by the topology controller only during a server side dry run apply operation. It is used for validating update webhooks for objects which get updated by template rotation (e.g. InfrastructureMachineTemplate). When the annotation is set and the admission request is a dry run, the webhook should deny validation due to immutability. By that the request will succeed (without any changes to the actual object because it is a dry run) and the topology controller will receive the resulting object. | +| topology.cluster.x-k8s.io/hold-upgrade-sequence | It can be used to hold the entire MachineDeployment upgrade sequence. If the annotation is set on a MachineDeployment topology in Cluster.spec.topology.workers, the Kubernetes upgrade for this MachineDeployment topology and all subsequent ones is deferred. | +| machine.cluster.x-k8s.io/certificates-expiry | It captures the expiry date of the machine certificates in RFC3339 format. It is used to trigger rollout of control plane machines before certificates expire. It can be set on BootstrapConfig and Machine objects. The value set on Machine object takes precedence. The annotation is only used by control plane machines. | +| machine.cluster.x-k8s.io/exclude-node-draining | It explicitly skips node draining if set. | +| machine.cluster.x-k8s.io/exclude-wait-for-node-volume-detach | It explicitly skips the waiting for node volume detaching if set. | +| pre-drain.delete.hook.machine.cluster.x-k8s.io | It specifies the prefix we search each annotation for during the pre-drain.delete lifecycle hook to pause reconciliation of deletion. These hooks will prevent removal of draining the associated node until all are removed. | +| pre-terminate.delete.hook.machine.cluster.x-k8s.io | It specifies the prefix we search each annotation for during the pre-terminate.delete lifecycle hook to pause reconciliation of deletion. These hooks will prevent removal of an instance from an infrastructure provider until all are removed. | +| machinedeployment.clusters.x-k8s.io/revision | It is the revision annotation of a machine deployment's machine sets which records its rollout sequence. | +| machinedeployment.clusters.x-k8s.io/revision-history | It maintains the history of all old revisions that a machine set has served for a machine deployment. | +| machinedeployment.clusters.x-k8s.io/desired-replicas | It is the desired replicas for a machine deployment recorded as an annotation in its machine sets. Helps in separating scaling events from the rollout process and for determining if the new machine set for a deployment is really saturated. | +| machinedeployment.clusters.x-k8s.io/max-replicas | It is the maximum replicas a deployment can have at a given point, which is machinedeployment.spec.replicas + maxSurge. Used by the underlying machine sets to estimate their proportions in case the deployment has surge replicas. | +| controlplane.cluster.x-k8s.io/skip-coredns | It explicitly skips reconciling CoreDNS if set. | +| controlplane.cluster.x-k8s.io/skip-kube-proxy | It explicitly skips reconciling kube-proxy if set. | +| controlplane.cluster.x-k8s.io/kubeadm-cluster-configuration | It is a machine annotation that stores the json-marshalled string of KCP ClusterConfiguration. This annotation is used to detect any changes in ClusterConfiguration and trigger machine rollout in KCP. | +| controlplane.cluster.x-k8s.io/remediation-in-progress | It is a KCP annotation that tracks that the system is in between having deleted an unhealthy machine and recreating its replacement. | +| controlplane.cluster.x-k8s.io/remediation-for | It is a machine annotation that links a new machine to the unhealthy machine it is replacing. | diff --git a/exp/internal/controllers/machinepool_controller.go b/exp/internal/controllers/machinepool_controller.go index 404865b7bbfb..55c9e3175e2a 100644 --- a/exp/internal/controllers/machinepool_controller.go +++ b/exp/internal/controllers/machinepool_controller.go @@ -367,3 +367,22 @@ func (r *MachinePoolReconciler) nodeToMachinePool(o client.Object) []reconcile.R return nil } + +// infraMachineToMachinePoolMapper is a mapper function that maps an InfraMachine to the MachinePool that owns it. +// This is used to trigger an update of the MachinePool when a InfraMachine is changed. +func infraMachineToMachinePoolMapper(o client.Object) []ctrl.Request { + labels := o.GetLabels() + + if poolName, ok := labels[clusterv1.MachinePoolNameLabel]; ok { + return []ctrl.Request{ + { + NamespacedName: client.ObjectKey{ + Namespace: o.GetNamespace(), + Name: poolName, + }, + }, + } + } + + return nil +} diff --git a/exp/internal/controllers/machinepool_controller_phases.go b/exp/internal/controllers/machinepool_controller_phases.go index cb60b4429569..c4885f6896d4 100644 --- a/exp/internal/controllers/machinepool_controller_phases.go +++ b/exp/internal/controllers/machinepool_controller_phases.go @@ -25,9 +25,12 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/utils/pointer" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/source" @@ -36,10 +39,12 @@ import ( "sigs.k8s.io/cluster-api/controllers/external" capierrors "sigs.k8s.io/cluster-api/errors" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + capilabels "sigs.k8s.io/cluster-api/internal/labels" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" utilconversion "sigs.k8s.io/cluster-api/util/conversion" + "sigs.k8s.io/cluster-api/util/labels" "sigs.k8s.io/cluster-api/util/patch" ) @@ -284,6 +289,10 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, clu conditions.WithFallbackValue(ready, clusterv1.WaitingForInfrastructureFallbackReason, clusterv1.ConditionSeverityInfo, ""), ) + if err := r.reconcileMachinePoolMachines(ctx, mp, infraConfig); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile MachinePool machines for MachinePool `%s`", mp.Name) + } + if !mp.Status.InfrastructureReady { log.Info("Infrastructure provider is not ready, requeuing") return ctrl.Result{RequeueAfter: externalReadyWait}, nil @@ -317,3 +326,300 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, clu return ctrl.Result{}, nil } + +// reconcileMachinePoolMachines reconciles MachinePool Machines associated with a MachinePool. +func (r *MachinePoolReconciler) reconcileMachinePoolMachines(ctx context.Context, mp *expv1.MachinePool, infraMachinePool *unstructured.Unstructured) error { + log := ctrl.LoggerFrom(ctx) + + var noKind bool + var noSelector bool + + var infraMachineKind string + if err := util.UnstructuredUnmarshalField(infraMachinePool, &infraMachineKind, "status", "infrastructureMachineKind"); err != nil { + if errors.Is(err, util.ErrUnstructuredFieldNotFound) { + noKind = true + } else { + return errors.Wrapf(err, "failed to retrieve infraMachineKind from infrastructure provider for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + } + } + + var infraMachineSelector metav1.LabelSelector + if err := util.UnstructuredUnmarshalField(infraMachinePool, &infraMachineSelector, "status", "infrastructureMachineSelector"); err != nil { + if errors.Is(err, util.ErrUnstructuredFieldNotFound) { + noSelector = true + } else { + return errors.Wrapf(err, "failed to retrieve infraMachineSelector from infrastructure provider for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + } + } + + // If one missing but not both, return error + if noKind != noSelector { + return errors.Errorf("only one of infraMachineKind or infraMachineSelector found, both must be present") + } + + // If proper MachinePool Machines aren't supported by the InfraMachinePool, just create Machines to match the replica count. + if noKind && noSelector { + log.Info("No infraMachineKind or infraMachineSelector found, reconciling fallback machines") + return r.reconcileFallbackMachines(ctx, mp) + } + + log.Info("Reconciling MachinePool Machines", "infrastructureMachineKind", infraMachineKind, "infrastructureMachineSelector", infraMachineSelector) + var infraMachineList unstructured.UnstructuredList + + infraMachineList.SetAPIVersion(infraMachinePool.GetAPIVersion()) + infraMachineList.SetKind(infraMachineKind) + + if err := r.Client.List(ctx, &infraMachineList, client.InNamespace(mp.Namespace), client.MatchingLabels(infraMachineSelector.MatchLabels)); err != nil { + return errors.Wrapf(err, "failed to list infra machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + } + + for i := range infraMachineList.Items { + infraMachine := &infraMachineList.Items[i] + // Add watcher for external object, if there isn't one already. + _, loaded := r.externalWatchers.LoadOrStore(infraMachine.GroupVersionKind().String(), struct{}{}) + if !loaded && r.controller != nil { + log.Info("Adding watcher on external object", "groupVersionKind", infraMachine.GroupVersionKind()) + err := r.controller.Watch( + &source.Kind{Type: infraMachine}, + // &handler.EnqueueRequestForOwner{OwnerType: &expv1.MachinePool{}}, + handler.EnqueueRequestsFromMapFunc(infraMachineToMachinePoolMapper), + ) + if err != nil { + r.externalWatchers.Delete(infraMachine.GroupVersionKind().String()) + return errors.Wrapf(err, "failed to add watcher on external object %q", infraMachine.GroupVersionKind()) + } + } + } + + machineList := &clusterv1.MachineList{} + labels := map[string]string{ + clusterv1.MachinePoolNameLabel: mp.Name, + clusterv1.ClusterNameLabel: mp.Spec.ClusterName, + } + + if err := r.Client.List(ctx, machineList, client.InNamespace(mp.Namespace), client.MatchingLabels(labels)); err != nil { + return err + } + + if err := r.deleteDanglingMachines(ctx, mp, machineList.Items); err != nil { + return errors.Wrapf(err, "failed to clean up orphan machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + } + + if err := r.createMachinesIfNotExists(ctx, mp, machineList.Items, infraMachineList.Items); err != nil { + return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + } + + if err := r.setInfraMachineOwnerRefs(ctx, mp, infraMachineList.Items); err != nil { + return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + } + + return nil +} + +// deleteDanglingMachines deletes all MachinePool Machines with an infraRef pointing to an infraMachine that no longer exists. +// This allows us to clean up Machines when the MachinePool is scaled down. +func (r *MachinePoolReconciler) deleteDanglingMachines(ctx context.Context, mp *expv1.MachinePool, machines []clusterv1.Machine) error { + log := ctrl.LoggerFrom(ctx) + + log.V(2).Info("Deleting orphaned machines", "machinePool", mp.Name, "namespace", mp.Namespace) + + for i := range machines { + machine := &machines[i] + infraRef := machine.Spec.InfrastructureRef + + // Check this here since external.Get() will complain if the infraRef is empty, i.e. no object kind or apiVersion. + if (infraRef == corev1.ObjectReference{}) { + // Until the InfraMachinePool populates its selector kind and selector labels, the MachinePool controller can't tell that it supports MachinePool Machines + // and will create fallback machines to begin with. When the selector fields gets populated, we want to clean up those fallback machines. + log.V(2).Info("Machine has an empty infraRef, this a fallback machine, will delete", "machine", machine.Name, "namespace", machine.Namespace) + if err := r.Client.Delete(ctx, machine); err != nil { + return err + } + + continue + } + + _, err := external.Get(ctx, r.Client, &infraRef, mp.Namespace) + if err != nil { + if !apierrors.IsNotFound(err) { + log.Error(err, "unexpected error while trying to get infra machine, will delete anyway", "machine", machine.Name, "namespace", machine.Namespace) + } + + log.V(2).Info("Deleting orphaned machine", "machine", machine.Name, "namespace", machine.Namespace) + if err := r.Client.Delete(ctx, machine); err != nil { + return err + } + } else { + log.V(2).Info("Machine is not orphaned, nothing to do", "machine", machine.Name, "namespace", machine.Namespace) + } + } + + return nil +} + +// createMachinesIfNotExists creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef. +func (r *MachinePoolReconciler) createMachinesIfNotExists(ctx context.Context, mp *expv1.MachinePool, machines []clusterv1.Machine, infraMachines []unstructured.Unstructured) error { + infraRefNames := sets.Set[string]{} + for _, machine := range machines { + infraRef := machine.Spec.InfrastructureRef + infraRefNames.Insert(infraRef.Name) + } + + for i := range infraMachines { + infraMachine := &infraMachines[i] + if infraRefNames.Has(infraMachine.GetName()) { + continue + } + + machine := getNewMachine(mp, infraMachine, false) + if err := r.Client.Create(ctx, machine); err != nil { + return errors.Wrapf(err, "failed to create new Machine for infraMachine %q in namespace %q", infraMachine.GetName(), infraMachine.GetNamespace()) + } + } + + return nil +} + +// setInfraMachineOwnerRefs creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef. +func (r *MachinePoolReconciler) setInfraMachineOwnerRefs(ctx context.Context, mp *expv1.MachinePool, infraMachines []unstructured.Unstructured) error { + machineList := &clusterv1.MachineList{} + labels := map[string]string{ + clusterv1.MachinePoolNameLabel: mp.Name, + clusterv1.ClusterNameLabel: mp.Spec.ClusterName, + } + + if err := r.Client.List(ctx, machineList, client.InNamespace(mp.Namespace), client.MatchingLabels(labels)); err != nil { + return err + } + + infraMachineNameToMachine := make(map[string]clusterv1.Machine) + for _, machine := range machineList.Items { + infraRef := machine.Spec.InfrastructureRef + infraMachineNameToMachine[infraRef.Name] = machine + } + + for i := range infraMachines { + infraMachine := &infraMachines[i] + ownerRefs := infraMachine.GetOwnerReferences() + hasOwnerMachine := false + for _, ownerRef := range ownerRefs { + if ownerRef.Kind == "Machine" && ownerRef.APIVersion == clusterv1.GroupVersion.String() { + hasOwnerMachine = true + break + } + } + + if !hasOwnerMachine { + machine, ok := infraMachineNameToMachine[infraMachine.GetName()] + if !ok { + return errors.Errorf("failed to patch ownerRef for infraMachine %q because no Machine has an infraRef pointing to it", infraMachine.GetName()) + } + // Set the owner reference on the infraMachine to the Machine since the infraMachine is created and owned by the infraMachinePool. + infraMachine.SetOwnerReferences([]metav1.OwnerReference{ + *metav1.NewControllerRef(&machine, machine.GroupVersionKind()), + }) + + patchHelper, err := patch.NewHelper(infraMachine, r.Client) + if err != nil { + return errors.Wrapf(err, "failed to create patch helper for infraMachine %q in namespace %q", infraMachine.GetName(), infraMachine.GetNamespace()) + } + + if err := patchHelper.Patch(ctx, infraMachine); err != nil { + return errors.Wrapf(err, "failed to patch infraMachine %q in namespace %q", infraMachine.GetName(), infraMachine.GetNamespace()) + } + } + } + + return nil +} + +// reconcileFallbackMachines creates Machines for InfraMachinePools that do not support MachinePool Machines. +// These Machines will contain only basic information and exist to make a more consistent user experience across all MachinePools and MachineDeployments and do not have an infraRef. +func (r *MachinePoolReconciler) reconcileFallbackMachines(ctx context.Context, mp *expv1.MachinePool) error { + machineList := &clusterv1.MachineList{} + labels := map[string]string{ + clusterv1.MachinePoolNameLabel: mp.Name, + clusterv1.ClusterNameLabel: mp.Spec.ClusterName, + } + + if err := r.Client.List(ctx, machineList, client.InNamespace(mp.Namespace), client.MatchingLabels(labels)); err != nil { + return err + } + + numMachines := len(machineList.Items) + + if mp.Spec.Replicas == nil { + return errors.New("MachinePool.Spec.Replicas is nil, this is unexpected") + } + + // If we have more machines than the desired number of replicas, delete machines until we have the correct number. + for numMachines > int(*mp.Spec.Replicas) { + machine := &machineList.Items[numMachines-1] + if err := r.Client.Delete(ctx, machine); err != nil { + return errors.Wrapf(err, "failed to delete Machine %q in namespace %q", machineList.Items[numMachines-1].Name, machineList.Items[numMachines-1].Namespace) + } + numMachines-- + } + + // If we have less machines than the desired number of replicas, create machines until we have the correct number. + for numMachines < int(*mp.Spec.Replicas) { + machine := getNewMachine(mp, nil, true) + if err := r.Client.Create(ctx, machine); err != nil { + return errors.Wrap(err, "failed to create new Machine") + } + numMachines++ + } + + return nil +} + +// getNewMachine creates a new Machine object. The name of the newly created resource is going +// to be created by the API server, we set the generateName field. +func getNewMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructured, isFallback bool) *clusterv1.Machine { + infraRef := corev1.ObjectReference{} + if infraMachine != nil { + infraRef.APIVersion = infraMachine.GetAPIVersion() + infraRef.Kind = infraMachine.GetKind() + infraRef.Name = infraMachine.GetName() + infraRef.Namespace = infraMachine.GetNamespace() + } + + annotations := mp.Spec.Template.Annotations + if annotations == nil { + annotations = make(map[string]string) + } + + machine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: fmt.Sprintf("%s-", mp.Name), + // Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine. + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(mp, mp.GroupVersionKind())}, + Namespace: mp.Namespace, + Labels: make(map[string]string), + Annotations: annotations, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: mp.Spec.ClusterName, + InfrastructureRef: infraRef, + }, + } + + // Set the labels from machinePool.Spec.Template.Labels as labels for the new Machine. + // Note: We can't just set `machinePool.Spec.Template.Labels` directly and thus "share" the labels + // map between Machine and machinePool.Spec.Template.Labels. This would mean that adding the + // MachinePoolNameLabel later on the Machine would also add the labels to machinePool.Spec.Template.Labels + // and thus modify the labels of the MachinePool. + for k, v := range mp.Spec.Template.Labels { + machine.Labels[k] = v + } + + // Enforce that the MachinePoolNameLabel and ClusterNameLabel are present on the Machine. + machine.Labels[clusterv1.MachinePoolNameLabel] = capilabels.MustFormatValue(mp.Name) + machine.Labels[clusterv1.ClusterNameLabel] = mp.Spec.ClusterName + + if isFallback { + labels.SetObjectLabel(machine, clusterv1.FallbackMachineLabel) + } + + return machine +} diff --git a/exp/internal/controllers/machinepool_controller_phases_test.go b/exp/internal/controllers/machinepool_controller_phases_test.go index dee57c28ed9e..79f808796e0c 100644 --- a/exp/internal/controllers/machinepool_controller_phases_test.go +++ b/exp/internal/controllers/machinepool_controller_phases_test.go @@ -33,12 +33,15 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/external" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/internal/test/builder" + "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/kubeconfig" ) const ( + timeout = time.Second * 30 clusterName = "test-cluster" wrongNamespace = "wrong-namespace" ) @@ -1013,6 +1016,486 @@ func TestReconcileMachinePoolInfrastructure(t *testing.T) { } } +func TestReconcileMachinePoolMachines(t *testing.T) { + defaultCluster := clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: metav1.NamespaceDefault, + }, + } + + defaultMachinePool := expv1.MachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machinepool-test", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + }, + }, + Spec: expv1.MachinePoolSpec{ + ClusterName: defaultCluster.Name, + Replicas: pointer.Int32(2), + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: "bootstrap.cluster.x-k8s.io/v1beta1", + Kind: "BootstrapConfig", + Name: "bootstrap-config1", + }, + }, + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "InfrastructureConfig", + Name: "infra-config1", + }, + }, + }, + }, + } + + infraMachine1 := unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "InfrastructureMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-machine1", + "namespace": metav1.NamespaceDefault, + "labels": map[string]interface{}{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: defaultMachinePool.Name, + }, + }, + }, + } + + infraMachine2 := unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "InfrastructureMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-machine2", + "namespace": metav1.NamespaceDefault, + "labels": map[string]interface{}{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: defaultMachinePool.Name, + }, + }, + }, + } + + machine1 := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine1", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: "machinepool-test", + }, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: clusterName, + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "InfrastructureMachine", + Name: "infra-machine1", + Namespace: metav1.NamespaceDefault, + }, + }, + } + + machine2 := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine2", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: "machinepool-test", + }, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: clusterName, + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "InfrastructureMachine", + Name: "infra-machine2", + Namespace: metav1.NamespaceDefault, + }, + }, + } + + machine3 := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine3", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: "machinepool-test", + clusterv1.FallbackMachineLabel: "true", + }, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: clusterName, + }, + } + + machine4 := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine4", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: "machinepool-test", + clusterv1.FallbackMachineLabel: "true", + }, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: clusterName, + }, + } + + machine5 := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine5", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: "machinepool-test", + clusterv1.FallbackMachineLabel: "true", + }, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: clusterName, + }, + } + + testCases := []struct { + name string + bootstrapConfig map[string]interface{} + infraConfig map[string]interface{} + machines []clusterv1.Machine + infraMachines []unstructured.Unstructured + machinepool *expv1.MachinePool + expectError bool + expectFallback bool + }{ + { + name: "two infra machines, should create two machinepool machines", + infraConfig: map[string]interface{}{ + "kind": "InfrastructureConfig", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-config1", + "namespace": metav1.NamespaceDefault, + }, + "spec": map[string]interface{}{ + "providerIDList": []interface{}{ + "test://id-1", + }, + }, + "status": map[string]interface{}{ + "ready": true, + "addresses": []interface{}{ + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.1", + }, + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.2", + }, + }, + "infrastructureMachineKind": "InfrastructureMachine", + "infrastructureMachineSelector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: defaultMachinePool.Name, + }, + }, + }, + }, + infraMachines: []unstructured.Unstructured{ + infraMachine1, + infraMachine2, + }, + expectError: false, + }, + { + name: "two infra machines and two machinepool machines, nothing to do", + infraConfig: map[string]interface{}{ + "kind": "InfrastructureConfig", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-config1", + "namespace": metav1.NamespaceDefault, + }, + "spec": map[string]interface{}{ + "providerIDList": []interface{}{ + "test://id-1", + }, + }, + "status": map[string]interface{}{ + "ready": true, + "addresses": []interface{}{ + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.1", + }, + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.2", + }, + }, + "infrastructureMachineKind": "InfrastructureMachine", + "infrastructureMachineSelector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: defaultMachinePool.Name, + }, + }, + }, + }, + machines: []clusterv1.Machine{ + machine1, + machine2, + }, + infraMachines: []unstructured.Unstructured{ + infraMachine1, + infraMachine2, + }, + expectError: false, + }, + { + name: "one extra machine, should delete", + infraConfig: map[string]interface{}{ + "kind": "InfrastructureConfig", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-config1", + "namespace": metav1.NamespaceDefault, + }, + "spec": map[string]interface{}{ + "providerIDList": []interface{}{ + "test://id-1", + }, + }, + "status": map[string]interface{}{ + "ready": true, + "addresses": []interface{}{ + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.1", + }, + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.2", + }, + }, + "infrastructureMachineKind": "InfrastructureMachine", + "infrastructureMachineSelector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: defaultMachinePool.Name, + }, + }, + }, + }, + machines: []clusterv1.Machine{ + machine1, + machine2, + }, + infraMachines: []unstructured.Unstructured{ + infraMachine1, + }, + expectError: false, + }, + { + name: "fallback machinepool, nothing to do", + infraConfig: map[string]interface{}{ + "kind": "InfrastructureConfig", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-config1", + "namespace": metav1.NamespaceDefault, + }, + "spec": map[string]interface{}{ + "providerIDList": []interface{}{ + "test://id-1", + }, + }, + "status": map[string]interface{}{ + "ready": true, + "addresses": []interface{}{ + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.1", + }, + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.2", + }, + }, + }, + }, + machines: []clusterv1.Machine{ + machine3, + machine4, + }, + expectError: false, + expectFallback: true, + }, + { + name: "fallback machinepool, create one more machine", + infraConfig: map[string]interface{}{ + "kind": "InfrastructureConfig", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-config1", + "namespace": metav1.NamespaceDefault, + }, + "spec": map[string]interface{}{ + "providerIDList": []interface{}{ + "test://id-1", + }, + }, + "status": map[string]interface{}{ + "ready": true, + "addresses": []interface{}{ + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.1", + }, + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.2", + }, + }, + }, + }, + machines: []clusterv1.Machine{ + machine3, + }, + expectError: false, + expectFallback: true, + }, + { + name: "fallback machinepool, delete extra machine", + infraConfig: map[string]interface{}{ + "kind": "InfrastructureConfig", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-config1", + "namespace": metav1.NamespaceDefault, + }, + "spec": map[string]interface{}{ + "providerIDList": []interface{}{ + "test://id-1", + }, + }, + "status": map[string]interface{}{ + "ready": true, + "addresses": []interface{}{ + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.1", + }, + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.2", + }, + }, + }, + }, + machines: []clusterv1.Machine{ + machine3, + machine4, + machine5, + }, + expectError: false, + expectFallback: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + if tc.machinepool == nil { + tc.machinepool = defaultMachinePool.DeepCopy() + } + + objs := []client.Object{defaultCluster.DeepCopy()} + + infraConfig := &unstructured.Unstructured{Object: tc.infraConfig} + + objs = append(objs, tc.machinepool, infraConfig.DeepCopy()) + + for _, infraMachine := range tc.infraMachines { + objs = append(objs, infraMachine.DeepCopy()) + } + + for _, machine := range tc.machines { + objs = append(objs, machine.DeepCopy()) + } + + r := &MachinePoolReconciler{ + Client: fake.NewClientBuilder().WithObjects(objs...).Build(), + } + + err := r.reconcileMachinePoolMachines(ctx, tc.machinepool, infraConfig) + + r.reconcilePhase(tc.machinepool) + if tc.expectError { + g.Expect(err).ToNot(BeNil()) + } else { + g.Expect(err).To(BeNil()) + + g.Eventually(func() bool { + machineList := &clusterv1.MachineList{} + labels := map[string]string{ + clusterv1.ClusterNameLabel: defaultCluster.Name, + clusterv1.MachinePoolNameLabel: tc.machinepool.Name, + } + if err := r.Client.List(ctx, machineList, client.InNamespace(tc.machinepool.Namespace), client.MatchingLabels(labels)); err != nil { + t.Log("Failed to list machines with error:", err) + return false + } + + if tc.expectFallback { + if len(machineList.Items) != int(*tc.machinepool.Spec.Replicas) { + t.Logf("Machine list length %d != replicas %d", len(machineList.Items), *tc.machinepool.Spec.Replicas) + + return false + } + } else { + if len(machineList.Items) != len(tc.infraMachines) { + t.Logf("Machine list length %d != infraMachine list length %d", len(machineList.Items), len(tc.infraMachines)) + + return false + } + + for i := range machineList.Items { + machine := &machineList.Items[i] + infraMachine, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Namespace) + if err != nil { + t.Log("Failed to get infraMachine with error:", err) + return false + } + + if util.IsControlledBy(infraMachine, machine) { + t.Log("InfraMachine is not controlled by machine") + return false + } + } + } + + return true + }, timeout).Should(BeTrue()) + } + }) + } +} + func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { g := NewWithT(t) diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 75d8b78a6ede..c926a27f1638 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -268,6 +268,8 @@ func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clust } func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + // If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly // owned by the Cluster (if not already present). if r.shouldAdopt(m) { @@ -279,6 +281,14 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, })) } + // We create fallback machines for InfraMachinePools that don't support MachinePool Machines. These machines don't point to any + // resources and exist to provide a consistent user experience for MachinePools, so we don't want to reconcile them. + if _, isFallbackMachine := m.Labels[clusterv1.FallbackMachineLabel]; isFallbackMachine { + log.Info("Skipping reconciliation for fallback machine") + // Do not reconcile if the machine is a fallback machine. + return ctrl.Result{}, nil + } + phases := []func(context.Context, *clusterv1.Cluster, *clusterv1.Machine) (ctrl.Result, error){ r.reconcileBootstrap, r.reconcileInfrastructure, @@ -306,6 +316,13 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { //nolint:gocyclo log := ctrl.LoggerFrom(ctx) + if _, isFallbackMachine := m.Labels[clusterv1.FallbackMachineLabel]; isFallbackMachine { + log.Info("Skipping infra and bootstrap deletion for fallback machinepool machine") + + controllerutil.RemoveFinalizer(m, clusterv1.MachineFinalizer) + return ctrl.Result{}, nil + } + err := r.isDeleteNodeAllowed(ctx, cluster, m) isDeleteNodeAllowed := err == nil if err != nil { diff --git a/internal/controllers/machine/machine_controller_phases.go b/internal/controllers/machine/machine_controller_phases.go index 83c493afc0c4..0a0d7e5eacdb 100644 --- a/internal/controllers/machine/machine_controller_phases.go +++ b/internal/controllers/machine/machine_controller_phases.go @@ -245,13 +245,19 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, cluster *clust return ctrl.Result{}, err } if infraReconcileResult.RequeueAfter > 0 { - // Infra object went missing after the machine was up and running + _, isMachinePoolOwned := m.Labels[clusterv1.MachinePoolNameLabel] + + // Infra object went missing after the machine was up and running. Ignore if the machine is owned by a machine pool because the infraRef will be deleted by the infra machine pool controller. if m.Status.InfrastructureReady { - log.Error(err, "Machine infrastructure reference has been deleted after being ready, setting failure state") - m.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.InvalidConfigurationMachineError) - m.Status.FailureMessage = pointer.String(fmt.Sprintf("Machine infrastructure resource %v with name %q has been deleted after being ready", - m.Spec.InfrastructureRef.GroupVersionKind(), m.Spec.InfrastructureRef.Name)) - return ctrl.Result{}, errors.Errorf("could not find %v %q for Machine %q in namespace %q, requeuing", m.Spec.InfrastructureRef.GroupVersionKind().String(), m.Spec.InfrastructureRef.Name, m.Name, m.Namespace) + if isMachinePoolOwned { + m.Status.InfrastructureReady = false + } else { + log.Error(err, "Machine infrastructure reference has been deleted after being ready, setting failure state") + m.Status.FailureReason = capierrors.MachineStatusErrorPtr(capierrors.InvalidConfigurationMachineError) + m.Status.FailureMessage = pointer.String(fmt.Sprintf("Machine infrastructure resource %v with name %q has been deleted after being ready", + m.Spec.InfrastructureRef.GroupVersionKind(), m.Spec.InfrastructureRef.Name)) + return ctrl.Result{}, errors.Errorf("could not find %v %q for Machine %q in namespace %q, requeuing", m.Spec.InfrastructureRef.GroupVersionKind().String(), m.Spec.InfrastructureRef.Name, m.Name, m.Namespace) + } } return ctrl.Result{RequeueAfter: infraReconcileResult.RequeueAfter}, nil } diff --git a/test/infrastructure/docker/api/v1alpha3/conversion.go b/test/infrastructure/docker/api/v1alpha3/conversion.go index 6bdb64825187..e8962dee77c5 100644 --- a/test/infrastructure/docker/api/v1alpha3/conversion.go +++ b/test/infrastructure/docker/api/v1alpha3/conversion.go @@ -78,13 +78,30 @@ func (dst *DockerClusterList) ConvertFrom(srcRaw conversion.Hub) error { func (src *DockerMachine) ConvertTo(dstRaw conversion.Hub) error { dst := dstRaw.(*infrav1.DockerMachine) - return Convert_v1alpha3_DockerMachine_To_v1beta1_DockerMachine(src, dst, nil) + if err := Convert_v1alpha3_DockerMachine_To_v1beta1_DockerMachine(src, dst, nil); err != nil { + return err + } + + // Manually restore data. + restored := &infrav1.DockerMachine{} + if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok { + return err + } + + dst.Spec.InstanceName = restored.Spec.InstanceName + + return nil } func (dst *DockerMachine) ConvertFrom(srcRaw conversion.Hub) error { src := srcRaw.(*infrav1.DockerMachine) - return Convert_v1beta1_DockerMachine_To_v1alpha3_DockerMachine(src, dst, nil) + if err := Convert_v1beta1_DockerMachine_To_v1alpha3_DockerMachine(src, dst, nil); err != nil { + return err + } + + // Preserve Hub data on down-conversion except for metadata + return utilconversion.MarshalData(src, dst) } func (src *DockerMachineList) ConvertTo(dstRaw conversion.Hub) error { @@ -113,6 +130,7 @@ func (src *DockerMachineTemplate) ConvertTo(dstRaw conversion.Hub) error { } dst.Spec.Template.ObjectMeta = restored.Spec.Template.ObjectMeta + dst.Spec.Template.Spec.InstanceName = restored.Spec.Template.Spec.InstanceName return nil } @@ -154,3 +172,7 @@ func Convert_v1beta1_DockerMachineTemplateResource_To_v1alpha3_DockerMachineTemp // NOTE: custom conversion func is required because spec.template.metadata has been added in v1beta1. return autoConvert_v1beta1_DockerMachineTemplateResource_To_v1alpha3_DockerMachineTemplateResource(in, out, s) } + +func Convert_v1beta1_DockerMachineSpec_To_v1alpha3_DockerMachineSpec(in *infrav1.DockerMachineSpec, out *DockerMachineSpec, s apiconversion.Scope) error { + return autoConvert_v1beta1_DockerMachineSpec_To_v1alpha3_DockerMachineSpec(in, out, s) +} diff --git a/test/infrastructure/docker/api/v1alpha3/zz_generated.conversion.go b/test/infrastructure/docker/api/v1alpha3/zz_generated.conversion.go index d86ac186fff9..363a145af4d1 100644 --- a/test/infrastructure/docker/api/v1alpha3/zz_generated.conversion.go +++ b/test/infrastructure/docker/api/v1alpha3/zz_generated.conversion.go @@ -108,11 +108,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta1.DockerMachineSpec)(nil), (*DockerMachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta1_DockerMachineSpec_To_v1alpha3_DockerMachineSpec(a.(*v1beta1.DockerMachineSpec), b.(*DockerMachineSpec), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*DockerMachineStatus)(nil), (*v1beta1.DockerMachineStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha3_DockerMachineStatus_To_v1beta1_DockerMachineStatus(a.(*DockerMachineStatus), b.(*v1beta1.DockerMachineStatus), scope) }); err != nil { @@ -173,6 +168,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1beta1.DockerMachineSpec)(nil), (*DockerMachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_DockerMachineSpec_To_v1alpha3_DockerMachineSpec(a.(*v1beta1.DockerMachineSpec), b.(*DockerMachineSpec), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1beta1.DockerMachineTemplateResource)(nil), (*DockerMachineTemplateResource)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_DockerMachineTemplateResource_To_v1alpha3_DockerMachineTemplateResource(a.(*v1beta1.DockerMachineTemplateResource), b.(*DockerMachineTemplateResource), scope) }); err != nil { @@ -480,6 +480,7 @@ func Convert_v1alpha3_DockerMachineSpec_To_v1beta1_DockerMachineSpec(in *DockerM } func autoConvert_v1beta1_DockerMachineSpec_To_v1alpha3_DockerMachineSpec(in *v1beta1.DockerMachineSpec, out *DockerMachineSpec, s conversion.Scope) error { + // WARNING: in.InstanceName requires manual conversion: does not exist in peer-type out.ProviderID = (*string)(unsafe.Pointer(in.ProviderID)) out.CustomImage = in.CustomImage out.PreLoadImages = *(*[]string)(unsafe.Pointer(&in.PreLoadImages)) @@ -488,11 +489,6 @@ func autoConvert_v1beta1_DockerMachineSpec_To_v1alpha3_DockerMachineSpec(in *v1b return nil } -// Convert_v1beta1_DockerMachineSpec_To_v1alpha3_DockerMachineSpec is an autogenerated conversion function. -func Convert_v1beta1_DockerMachineSpec_To_v1alpha3_DockerMachineSpec(in *v1beta1.DockerMachineSpec, out *DockerMachineSpec, s conversion.Scope) error { - return autoConvert_v1beta1_DockerMachineSpec_To_v1alpha3_DockerMachineSpec(in, out, s) -} - func autoConvert_v1alpha3_DockerMachineStatus_To_v1beta1_DockerMachineStatus(in *DockerMachineStatus, out *v1beta1.DockerMachineStatus, s conversion.Scope) error { out.Ready = in.Ready out.LoadBalancerConfigured = in.LoadBalancerConfigured diff --git a/test/infrastructure/docker/api/v1alpha4/conversion.go b/test/infrastructure/docker/api/v1alpha4/conversion.go index a1b42d8f4d53..14e1324600df 100644 --- a/test/infrastructure/docker/api/v1alpha4/conversion.go +++ b/test/infrastructure/docker/api/v1alpha4/conversion.go @@ -96,13 +96,30 @@ func (dst *DockerClusterTemplateList) ConvertFrom(srcRaw conversion.Hub) error { func (src *DockerMachine) ConvertTo(dstRaw conversion.Hub) error { dst := dstRaw.(*infrav1.DockerMachine) - return Convert_v1alpha4_DockerMachine_To_v1beta1_DockerMachine(src, dst, nil) + if err := Convert_v1alpha4_DockerMachine_To_v1beta1_DockerMachine(src, dst, nil); err != nil { + return err + } + + // Manually restore data. + restored := &infrav1.DockerMachine{} + if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok { + return err + } + + dst.Spec.InstanceName = restored.Spec.InstanceName + + return nil } func (dst *DockerMachine) ConvertFrom(srcRaw conversion.Hub) error { src := srcRaw.(*infrav1.DockerMachine) - return Convert_v1beta1_DockerMachine_To_v1alpha4_DockerMachine(src, dst, nil) + if err := Convert_v1beta1_DockerMachine_To_v1alpha4_DockerMachine(src, dst, nil); err != nil { + return err + } + + // Preserve Hub data on down-conversion except for metadata + return utilconversion.MarshalData(src, dst) } func (src *DockerMachineList) ConvertTo(dstRaw conversion.Hub) error { @@ -131,6 +148,7 @@ func (src *DockerMachineTemplate) ConvertTo(dstRaw conversion.Hub) error { } dst.Spec.Template.ObjectMeta = restored.Spec.Template.ObjectMeta + dst.Spec.Template.Spec.InstanceName = restored.Spec.Template.Spec.InstanceName return nil } @@ -171,3 +189,7 @@ func Convert_v1beta1_DockerMachineTemplateResource_To_v1alpha4_DockerMachineTemp // NOTE: custom conversion func is required because spec.template.metadata has been added in v1beta1. return autoConvert_v1beta1_DockerMachineTemplateResource_To_v1alpha4_DockerMachineTemplateResource(in, out, s) } + +func Convert_v1beta1_DockerMachineSpec_To_v1alpha4_DockerMachineSpec(in *infrav1.DockerMachineSpec, out *DockerMachineSpec, s apiconversion.Scope) error { + return autoConvert_v1beta1_DockerMachineSpec_To_v1alpha4_DockerMachineSpec(in, out, s) +} diff --git a/test/infrastructure/docker/api/v1alpha4/zz_generated.conversion.go b/test/infrastructure/docker/api/v1alpha4/zz_generated.conversion.go index 9594641da789..650dcc92a355 100644 --- a/test/infrastructure/docker/api/v1alpha4/zz_generated.conversion.go +++ b/test/infrastructure/docker/api/v1alpha4/zz_generated.conversion.go @@ -158,11 +158,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta1.DockerMachineSpec)(nil), (*DockerMachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta1_DockerMachineSpec_To_v1alpha4_DockerMachineSpec(a.(*v1beta1.DockerMachineSpec), b.(*DockerMachineSpec), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*DockerMachineStatus)(nil), (*v1beta1.DockerMachineStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_DockerMachineStatus_To_v1beta1_DockerMachineStatus(a.(*DockerMachineStatus), b.(*v1beta1.DockerMachineStatus), scope) }); err != nil { @@ -233,6 +228,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1beta1.DockerMachineSpec)(nil), (*DockerMachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_DockerMachineSpec_To_v1alpha4_DockerMachineSpec(a.(*v1beta1.DockerMachineSpec), b.(*DockerMachineSpec), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1beta1.DockerMachineTemplateResource)(nil), (*DockerMachineTemplateResource)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_DockerMachineTemplateResource_To_v1alpha4_DockerMachineTemplateResource(a.(*v1beta1.DockerMachineTemplateResource), b.(*DockerMachineTemplateResource), scope) }); err != nil { @@ -686,6 +686,7 @@ func Convert_v1alpha4_DockerMachineSpec_To_v1beta1_DockerMachineSpec(in *DockerM } func autoConvert_v1beta1_DockerMachineSpec_To_v1alpha4_DockerMachineSpec(in *v1beta1.DockerMachineSpec, out *DockerMachineSpec, s conversion.Scope) error { + // WARNING: in.InstanceName requires manual conversion: does not exist in peer-type out.ProviderID = (*string)(unsafe.Pointer(in.ProviderID)) out.CustomImage = in.CustomImage out.PreLoadImages = *(*[]string)(unsafe.Pointer(&in.PreLoadImages)) @@ -694,11 +695,6 @@ func autoConvert_v1beta1_DockerMachineSpec_To_v1alpha4_DockerMachineSpec(in *v1b return nil } -// Convert_v1beta1_DockerMachineSpec_To_v1alpha4_DockerMachineSpec is an autogenerated conversion function. -func Convert_v1beta1_DockerMachineSpec_To_v1alpha4_DockerMachineSpec(in *v1beta1.DockerMachineSpec, out *DockerMachineSpec, s conversion.Scope) error { - return autoConvert_v1beta1_DockerMachineSpec_To_v1alpha4_DockerMachineSpec(in, out, s) -} - func autoConvert_v1alpha4_DockerMachineStatus_To_v1beta1_DockerMachineStatus(in *DockerMachineStatus, out *v1beta1.DockerMachineStatus, s conversion.Scope) error { out.Ready = in.Ready out.LoadBalancerConfigured = in.LoadBalancerConfigured diff --git a/test/infrastructure/docker/api/v1beta1/dockermachine_types.go b/test/infrastructure/docker/api/v1beta1/dockermachine_types.go index d99e30be5455..f798d56d73cc 100644 --- a/test/infrastructure/docker/api/v1beta1/dockermachine_types.go +++ b/test/infrastructure/docker/api/v1beta1/dockermachine_types.go @@ -30,6 +30,13 @@ const ( // DockerMachineSpec defines the desired state of DockerMachine. type DockerMachineSpec struct { + // InstanceName indicates the name of the Docker container associated with this DockerMachine. + // Since the provider ID is not set until after the container is online, this field is used to + // maintain the association. If it is not populated, the name of the container will be the same + // as the name of the owner Machine. + // +optional + InstanceName string `json:"instanceName,omitempty"` + // ProviderID will be the container name in ProviderID format (docker:////) // +optional ProviderID *string `json:"providerID,omitempty"` diff --git a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml index 803e3f643776..9c8a868a317d 100644 --- a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml +++ b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml @@ -501,6 +501,56 @@ spec: - type type: object type: array + infrastructureMachineKind: + description: InfrastructureMachineKind is the kind of the infrastructure + resources behind MachinePool Machines. + type: string + infrastructureMachineSelector: + description: 'InfrastructureMachineSelector is a label query over + the infrastructure resources behind MachinePool Machines. More info: + https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors' + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object instances: description: Instances contains the status for each instance in the pool diff --git a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachines.yaml b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachines.yaml index fd6e74867836..5ac5c45d61a7 100644 --- a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachines.yaml +++ b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachines.yaml @@ -377,6 +377,13 @@ spec: type: boolean type: object type: array + instanceName: + description: InstanceName indicates the name of the Docker container + associated with this DockerMachine. Since the provider ID is not + set until after the container is online, this field is used to maintain + the association. If it is not populated, the name of the container + will be the same as the name of the owner Machine. + type: string preLoadImages: description: PreLoadImages allows to pre-load images in a newly created machine. This can be used to speed up tests by avoiding e.g. to diff --git a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinetemplates.yaml b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinetemplates.yaml index 5b5ca8dd2b66..a2d382e7a6cc 100644 --- a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinetemplates.yaml +++ b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinetemplates.yaml @@ -272,6 +272,14 @@ spec: type: boolean type: object type: array + instanceName: + description: InstanceName indicates the name of the Docker + container associated with this DockerMachine. Since the + provider ID is not set until after the container is online, + this field is used to maintain the association. If it is + not populated, the name of the container will be the same + as the name of the owner Machine. + type: string preLoadImages: description: PreLoadImages allows to pre-load images in a newly created machine. This can be used to speed up tests diff --git a/test/infrastructure/docker/exp/api/v1alpha3/conversion.go b/test/infrastructure/docker/exp/api/v1alpha3/conversion.go index 3eeb15387b8b..9816990b0c48 100644 --- a/test/infrastructure/docker/exp/api/v1alpha3/conversion.go +++ b/test/infrastructure/docker/exp/api/v1alpha3/conversion.go @@ -17,21 +17,41 @@ limitations under the License. package v1alpha3 import ( + apiconversion "k8s.io/apimachinery/pkg/conversion" "sigs.k8s.io/controller-runtime/pkg/conversion" infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" + utilconversion "sigs.k8s.io/cluster-api/util/conversion" ) func (src *DockerMachinePool) ConvertTo(dstRaw conversion.Hub) error { dst := dstRaw.(*infraexpv1.DockerMachinePool) - return Convert_v1alpha3_DockerMachinePool_To_v1beta1_DockerMachinePool(src, dst, nil) + if err := Convert_v1alpha3_DockerMachinePool_To_v1beta1_DockerMachinePool(src, dst, nil); err != nil { + return err + } + + // Manually restore data. + restored := &infraexpv1.DockerMachinePool{} + if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok { + return err + } + + dst.Status.InfrastructureMachineKind = restored.Status.InfrastructureMachineKind + dst.Status.InfrastructureMachineSelector = restored.Status.InfrastructureMachineSelector + + return nil } func (dst *DockerMachinePool) ConvertFrom(srcRaw conversion.Hub) error { src := srcRaw.(*infraexpv1.DockerMachinePool) - return Convert_v1beta1_DockerMachinePool_To_v1alpha3_DockerMachinePool(src, dst, nil) + if err := Convert_v1beta1_DockerMachinePool_To_v1alpha3_DockerMachinePool(src, dst, nil); err != nil { + return err + } + + // Preserve Hub data on down-conversion except for metadata + return utilconversion.MarshalData(src, dst) } func (src *DockerMachinePoolList) ConvertTo(dstRaw conversion.Hub) error { @@ -45,3 +65,8 @@ func (dst *DockerMachinePoolList) ConvertFrom(srcRaw conversion.Hub) error { return Convert_v1beta1_DockerMachinePoolList_To_v1alpha3_DockerMachinePoolList(src, dst, nil) } + +func Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha3_DockerMachinePoolStatus(in *infraexpv1.DockerMachinePoolStatus, out *DockerMachinePoolStatus, s apiconversion.Scope) error { + // NOTE: custom conversion func is required because spec.template.metadata has been added in v1beta1. + return autoConvert_v1beta1_DockerMachinePoolStatus_To_v1alpha3_DockerMachinePoolStatus(in, out, s) +} diff --git a/test/infrastructure/docker/exp/api/v1alpha3/zz_generated.conversion.go b/test/infrastructure/docker/exp/api/v1alpha3/zz_generated.conversion.go index 311ad6ada301..2d140d5072c9 100644 --- a/test/infrastructure/docker/exp/api/v1alpha3/zz_generated.conversion.go +++ b/test/infrastructure/docker/exp/api/v1alpha3/zz_generated.conversion.go @@ -95,7 +95,7 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta1.DockerMachinePoolStatus)(nil), (*DockerMachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + if err := s.AddConversionFunc((*v1beta1.DockerMachinePoolStatus)(nil), (*DockerMachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha3_DockerMachinePoolStatus(a.(*v1beta1.DockerMachinePoolStatus), b.(*DockerMachinePoolStatus), scope) }); err != nil { return err @@ -339,10 +339,7 @@ func autoConvert_v1beta1_DockerMachinePoolStatus_To_v1alpha3_DockerMachinePoolSt } else { out.Conditions = nil } + // WARNING: in.InfrastructureMachineSelector requires manual conversion: does not exist in peer-type + // WARNING: in.InfrastructureMachineKind requires manual conversion: does not exist in peer-type return nil } - -// Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha3_DockerMachinePoolStatus is an autogenerated conversion function. -func Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha3_DockerMachinePoolStatus(in *v1beta1.DockerMachinePoolStatus, out *DockerMachinePoolStatus, s conversion.Scope) error { - return autoConvert_v1beta1_DockerMachinePoolStatus_To_v1alpha3_DockerMachinePoolStatus(in, out, s) -} diff --git a/test/infrastructure/docker/exp/api/v1alpha4/conversion.go b/test/infrastructure/docker/exp/api/v1alpha4/conversion.go index acc200d9e51a..90a7cebadb20 100644 --- a/test/infrastructure/docker/exp/api/v1alpha4/conversion.go +++ b/test/infrastructure/docker/exp/api/v1alpha4/conversion.go @@ -17,21 +17,41 @@ limitations under the License. package v1alpha4 import ( + apiconversion "k8s.io/apimachinery/pkg/conversion" "sigs.k8s.io/controller-runtime/pkg/conversion" infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" + utilconversion "sigs.k8s.io/cluster-api/util/conversion" ) func (src *DockerMachinePool) ConvertTo(dstRaw conversion.Hub) error { dst := dstRaw.(*infraexpv1.DockerMachinePool) - return Convert_v1alpha4_DockerMachinePool_To_v1beta1_DockerMachinePool(src, dst, nil) + if err := Convert_v1alpha4_DockerMachinePool_To_v1beta1_DockerMachinePool(src, dst, nil); err != nil { + return err + } + + // Manually restore data. + restored := &infraexpv1.DockerMachinePool{} + if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok { + return err + } + + dst.Status.InfrastructureMachineKind = restored.Status.InfrastructureMachineKind + dst.Status.InfrastructureMachineSelector = restored.Status.InfrastructureMachineSelector + + return nil } func (dst *DockerMachinePool) ConvertFrom(srcRaw conversion.Hub) error { src := srcRaw.(*infraexpv1.DockerMachinePool) - return Convert_v1beta1_DockerMachinePool_To_v1alpha4_DockerMachinePool(src, dst, nil) + if err := Convert_v1beta1_DockerMachinePool_To_v1alpha4_DockerMachinePool(src, dst, nil); err != nil { + return err + } + + // Preserve Hub data on down-conversion except for metadata + return utilconversion.MarshalData(src, dst) } func (src *DockerMachinePoolList) ConvertTo(dstRaw conversion.Hub) error { @@ -45,3 +65,8 @@ func (dst *DockerMachinePoolList) ConvertFrom(srcRaw conversion.Hub) error { return Convert_v1beta1_DockerMachinePoolList_To_v1alpha4_DockerMachinePoolList(src, dst, nil) } + +func Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(in *infraexpv1.DockerMachinePoolStatus, out *DockerMachinePoolStatus, s apiconversion.Scope) error { + // NOTE: custom conversion func is required because spec.template.metadata has been added in v1beta1. + return autoConvert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(in, out, s) +} diff --git a/test/infrastructure/docker/exp/api/v1alpha4/zz_generated.conversion.go b/test/infrastructure/docker/exp/api/v1alpha4/zz_generated.conversion.go index 4f302088997d..612a615db8a5 100644 --- a/test/infrastructure/docker/exp/api/v1alpha4/zz_generated.conversion.go +++ b/test/infrastructure/docker/exp/api/v1alpha4/zz_generated.conversion.go @@ -95,7 +95,7 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta1.DockerMachinePoolStatus)(nil), (*DockerMachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + if err := s.AddConversionFunc((*v1beta1.DockerMachinePoolStatus)(nil), (*DockerMachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(a.(*v1beta1.DockerMachinePoolStatus), b.(*DockerMachinePoolStatus), scope) }); err != nil { return err @@ -339,10 +339,7 @@ func autoConvert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolSt } else { out.Conditions = nil } + // WARNING: in.InfrastructureMachineSelector requires manual conversion: does not exist in peer-type + // WARNING: in.InfrastructureMachineKind requires manual conversion: does not exist in peer-type return nil } - -// Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus is an autogenerated conversion function. -func Convert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(in *v1beta1.DockerMachinePoolStatus, out *DockerMachinePoolStatus, s conversion.Scope) error { - return autoConvert_v1beta1_DockerMachinePoolStatus_To_v1alpha4_DockerMachinePoolStatus(in, out, s) -} diff --git a/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_types.go b/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_types.go index 273475366b4b..055243311431 100644 --- a/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_types.go +++ b/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_types.go @@ -26,6 +26,9 @@ import ( const ( // MachinePoolFinalizer allows ReconcileDockerMachinePool to clean up resources. MachinePoolFinalizer = "dockermachinepool.infrastructure.cluster.x-k8s.io" + + // DockerMachinePoolNameLabel is the label indicating the name of the DockerMachinePool a DockerMachine is controleld by. + DockerMachinePoolNameLabel = "dockermachinepool.infrastructure.cluster.x-k8s.io/pool-name" ) // DockerMachinePoolMachineTemplate defines the desired state of DockerMachine. @@ -82,6 +85,15 @@ type DockerMachinePoolStatus struct { // Conditions defines current service state of the DockerMachinePool. // +optional Conditions clusterv1.Conditions `json:"conditions,omitempty"` + + // InfrastructureMachineSelector is a label query over the infrastructure resources behind MachinePool Machines. + // More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors + // +optional + InfrastructureMachineSelector metav1.LabelSelector `json:"infrastructureMachineSelector,omitempty"` + + // InfrastructureMachineKind is the kind of the infrastructure resources behind MachinePool Machines. + // +optional + InfrastructureMachineKind string `json:"infrastructureMachineKind,omitempty"` } // DockerMachinePoolInstanceStatus contains status information about a DockerMachinePool. @@ -130,13 +142,13 @@ type DockerMachinePool struct { } // GetConditions returns the set of conditions for this object. -func (c *DockerMachinePool) GetConditions() clusterv1.Conditions { - return c.Status.Conditions +func (d *DockerMachinePool) GetConditions() clusterv1.Conditions { + return d.Status.Conditions } // SetConditions sets the conditions on this object. -func (c *DockerMachinePool) SetConditions(conditions clusterv1.Conditions) { - c.Status.Conditions = conditions +func (d *DockerMachinePool) SetConditions(conditions clusterv1.Conditions) { + d.Status.Conditions = conditions } // +kubebuilder:object:root=true diff --git a/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_webhook.go b/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_webhook.go index a6ffc3bbdaf1..50fdbb238fd4 100644 --- a/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_webhook.go +++ b/test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_webhook.go @@ -20,8 +20,8 @@ import ( ctrl "sigs.k8s.io/controller-runtime" ) -func (c *DockerMachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error { +func (d *DockerMachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error { return ctrl.NewWebhookManagedBy(mgr). - For(c). + For(d). Complete() } diff --git a/test/infrastructure/docker/exp/api/v1beta1/zz_generated.deepcopy.go b/test/infrastructure/docker/exp/api/v1beta1/zz_generated.deepcopy.go index cf1e613a00d1..8a92065572e9 100644 --- a/test/infrastructure/docker/exp/api/v1beta1/zz_generated.deepcopy.go +++ b/test/infrastructure/docker/exp/api/v1beta1/zz_generated.deepcopy.go @@ -179,6 +179,7 @@ func (in *DockerMachinePoolStatus) DeepCopyInto(out *DockerMachinePoolStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + in.InfrastructureMachineSelector.DeepCopyInto(&out.InfrastructureMachineSelector) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DockerMachinePoolStatus. diff --git a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go index ae4e090b9612..9c5dd9eae98f 100644 --- a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go +++ b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go @@ -24,6 +24,7 @@ import ( "github.com/pkg/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/klog/v2" "k8s.io/utils/pointer" @@ -40,9 +41,11 @@ import ( expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" utilexp "sigs.k8s.io/cluster-api/exp/util" "sigs.k8s.io/cluster-api/test/infrastructure/container" + infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1" infraexpv1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/api/v1beta1" "sigs.k8s.io/cluster-api/test/infrastructure/docker/exp/internal/docker" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/predicates" ) @@ -107,6 +110,18 @@ func (r *DockerMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, err } + dockerMachinePool.Status.InfrastructureMachineSelector = metav1.LabelSelector{ + MatchLabels: map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + infraexpv1.DockerMachinePoolNameLabel: dockerMachinePool.Name, + }, + } + dockerMachinePool.Status.InfrastructureMachineKind = "DockerMachine" + // Patch now so that the status and selectors are available. + if err := patchHelper.Patch(ctx, dockerMachinePool); err != nil { + return ctrl.Result{}, err + } + // Always attempt to Patch the DockerMachinePool object and status after each reconciliation. defer func() { if err := patchDockerMachinePool(ctx, patchHelper, dockerMachinePool); err != nil { @@ -169,7 +184,40 @@ func (r *DockerMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr } func (r *DockerMachinePoolReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool) (ctrl.Result, error) { - pool, err := docker.NewNodePool(ctx, r.Client, cluster, machinePool, dockerMachinePool) + log := ctrl.LoggerFrom(ctx) + + dockerMachineList, err := getDockerMachines(ctx, r.Client, *dockerMachinePool) + if err != nil { + return ctrl.Result{}, err + } + + nodePoolInstances := make([]docker.NodePoolInstance, len(dockerMachineList.Items)) + for i, dockerMachine := range dockerMachineList.Items { + machine, err := util.GetOwnerMachine(ctx, r.Client, dockerMachine.ObjectMeta) + if err != nil { + log.V(2).Error(err, "failed to get owner machine, skipping") + continue + } + if machine == nil { + log.V(2).Info("owner machine not found, skipping", "docker-machine", dockerMachine.Name) + continue + } + + hasDeleteAnnotation := false + if machine.Annotations != nil { + _, hasDeleteAnnotation = machine.Annotations[clusterv1.DeleteMachineAnnotation] + } + + nodePoolInstances[i] = docker.NodePoolInstance{ + InstanceName: dockerMachine.Spec.InstanceName, + Bootstrapped: dockerMachine.Spec.Bootstrapped, + ProviderID: dockerMachine.Spec.ProviderID, + PrioritizeDelete: hasDeleteAnnotation, + Addresses: dockerMachine.Status.Addresses, + } + } + + pool, err := docker.NewNodePool(ctx, r.Client, cluster, machinePool, dockerMachinePool, nodePoolInstances) if err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to build new node pool") } @@ -195,7 +243,32 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust machinePool.Spec.Replicas = pointer.Int32(1) } - pool, err := docker.NewNodePool(ctx, r.Client, cluster, machinePool, dockerMachinePool) + dockerMachineList, err := getDockerMachines(ctx, r.Client, *dockerMachinePool) + if err != nil { + return ctrl.Result{}, err + } + + // Since nodepools don't persist the instances list, we need to construct it from the list of DockerMachines. + nodePoolInstances := make([]docker.NodePoolInstance, len(dockerMachineList.Items)) + for i, dockerMachine := range dockerMachineList.Items { + machine, err := util.GetOwnerMachine(ctx, r.Client, dockerMachine.ObjectMeta) + if err != nil { + log.V(2).Error(err, "Failed to get owner machine, skipping") + } + var hasDeleteAnnotation bool + if machine != nil { + _, hasDeleteAnnotation = machine.Annotations[clusterv1.DeleteMachineAnnotation] + } + + nodePoolInstances[i] = docker.NodePoolInstance{ + InstanceName: dockerMachine.Spec.InstanceName, + Bootstrapped: dockerMachine.Spec.Bootstrapped, + ProviderID: dockerMachine.Spec.ProviderID, + PrioritizeDelete: hasDeleteAnnotation, + } + } + + pool, err := docker.NewNodePool(ctx, r.Client, cluster, machinePool, dockerMachinePool, nodePoolInstances) if err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to build new node pool") } @@ -205,20 +278,35 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust if err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to generate workload cluster client") } + res, err := pool.ReconcileMachines(ctx, remoteClient) if err != nil { return res, err } + nodePoolInstancesResult := pool.GetNodePoolInstances() + // Derive info from Status.Instances dockerMachinePool.Spec.ProviderIDList = []string{} - for _, instance := range dockerMachinePool.Status.Instances { - if instance.ProviderID != nil && instance.Ready { + for _, instance := range nodePoolInstancesResult { + if instance.ProviderID != nil { dockerMachinePool.Spec.ProviderIDList = append(dockerMachinePool.Spec.ProviderIDList, *instance.ProviderID) } } - dockerMachinePool.Status.Replicas = int32(len(dockerMachinePool.Status.Instances)) + // Delete all DockerMachines that are not in the list of instances returned by the node pool. + if err := r.DeleteDanglingDockerMachines(ctx, dockerMachinePool, nodePoolInstancesResult); err != nil { + conditions.MarkFalse(dockerMachinePool, clusterv1.ReadyCondition, "FailedToDeleteOrphanedMachines", clusterv1.ConditionSeverityWarning, err.Error()) + return ctrl.Result{}, errors.Wrap(err, "failed to delete orphaned machines") + } + + // Create a DockerMachine for each instance returned by the node pool if it doesn't exist. + if err := r.CreateDockerMachinesIfNotExists(ctx, machinePool, dockerMachinePool, nodePoolInstancesResult); err != nil { + conditions.MarkFalse(dockerMachinePool, clusterv1.ReadyCondition, "FailedToCreateNewMachines", clusterv1.ConditionSeverityWarning, err.Error()) + return ctrl.Result{}, errors.Wrap(err, "failed to create missing machines") + } + + dockerMachinePool.Status.Replicas = int32(len(dockerMachineList.Items)) if dockerMachinePool.Spec.ProviderID == "" { // This is a fake provider ID which does not tie back to any docker infrastructure. In cloud providers, @@ -227,25 +315,129 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust dockerMachinePool.Spec.ProviderID = getDockerMachinePoolProviderID(cluster.Name, dockerMachinePool.Name) } - dockerMachinePool.Status.Ready = len(dockerMachinePool.Spec.ProviderIDList) == int(*machinePool.Spec.Replicas) + if len(dockerMachinePool.Spec.ProviderIDList) == int(*machinePool.Spec.Replicas) { + dockerMachinePool.Status.Ready = true + conditions.MarkTrue(dockerMachinePool, expv1.ReplicasReadyCondition) + + return ctrl.Result{}, nil + } + + dockerMachinePool.Status.Ready = false + conditions.MarkFalse(dockerMachinePool, expv1.ReplicasReadyCondition, expv1.WaitingForReplicasReadyReason, clusterv1.ConditionSeverityInfo, "") // if some machine is still provisioning, force reconcile in few seconds to check again infrastructure. - if !dockerMachinePool.Status.Ready && res.IsZero() { + if res.IsZero() { return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } + return res, nil } +func getDockerMachines(ctx context.Context, c client.Client, dockerMachinePool infraexpv1.DockerMachinePool) (*infrav1.DockerMachineList, error) { + dockerMachineList := &infrav1.DockerMachineList{} + labels := dockerMachinePool.Status.InfrastructureMachineSelector.MatchLabels + if err := c.List(ctx, dockerMachineList, client.InNamespace(dockerMachinePool.Namespace), client.MatchingLabels(labels)); err != nil { + return nil, err + } + + return dockerMachineList, nil +} + +// DeleteDanglingDockerMachines deletes any DockerMachines owned by the DockerMachinePool that reference an invalid providerID, i.e. not in the latest copy of the node pool instances. +func (r *DockerMachinePoolReconciler) DeleteDanglingDockerMachines(ctx context.Context, dockerMachinePool *infraexpv1.DockerMachinePool, instances []docker.NodePoolInstance) error { + log := ctrl.LoggerFrom(ctx) + log.V(2).Info("Deleting orphaned machines", "dockerMachinePool", dockerMachinePool.Name, "namespace", dockerMachinePool.Namespace, "instances", instances) + dockerMachineList, err := getDockerMachines(ctx, r.Client, *dockerMachinePool) + if err != nil { + return err + } + + log.V(2).Info("DockerMachineList kind is", "kind", dockerMachineList.GetObjectKind()) + + instanceNameSet := map[string]struct{}{} + for _, instance := range instances { + instanceNameSet[instance.InstanceName] = struct{}{} + } + + for i := range dockerMachineList.Items { + dockerMachine := &dockerMachineList.Items[i] + if _, ok := instanceNameSet[dockerMachine.Spec.InstanceName]; !ok { + log.V(2).Info("Deleting orphaned DockerMachine", "dockerMachine", dockerMachine.Name, "namespace", dockerMachine.Namespace) + if err := r.Client.Delete(ctx, dockerMachine); err != nil { + return err + } + } else { + log.V(2).Info("Keeping DockerMachine, nothing to do", "dockerMachine", dockerMachine.Name, "namespace", dockerMachine.Namespace) + } + } + + return nil +} + +// CreateDockerMachinesIfNotExists creates a DockerMachine for each instance returned by the node pool if it doesn't exist. +func (r *DockerMachinePoolReconciler) CreateDockerMachinesIfNotExists(ctx context.Context, machinePool *expv1.MachinePool, dockerMachinePool *infraexpv1.DockerMachinePool, instances []docker.NodePoolInstance) error { + log := ctrl.LoggerFrom(ctx) + + log.V(2).Info("Creating missing machines", "dockerMachinePool", dockerMachinePool.Name, "namespace", dockerMachinePool.Namespace, "instances", instances) + + dockerMachineList, err := getDockerMachines(ctx, r.Client, *dockerMachinePool) + if err != nil { + return err + } + + instanceNameToDockerMachine := make(map[string]infrav1.DockerMachine) + for _, dockerMachine := range dockerMachineList.Items { + instanceNameToDockerMachine[dockerMachine.Spec.InstanceName] = dockerMachine + } + + for _, instance := range instances { + if _, exists := instanceNameToDockerMachine[instance.InstanceName]; exists { + continue + } + + labels := dockerMachinePool.Status.InfrastructureMachineSelector.MatchLabels + labels[clusterv1.MachinePoolNameLabel] = machinePool.Name + dockerMachine := &infrav1.DockerMachine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: dockerMachinePool.Namespace, + GenerateName: fmt.Sprintf("%s-", dockerMachinePool.Name), + Labels: labels, + Annotations: make(map[string]string), + // Note: This DockerMachine will be owned by the DockerMachinePool until the MachinePool controller creates its parent Machine. + }, + Spec: infrav1.DockerMachineSpec{ + InstanceName: instance.InstanceName, + }, + } + + log.V(2).Info("Instance name for dockerMachine is", "instanceName", instance.InstanceName, "dockerMachine", dockerMachine.Name) + + if err := r.Client.Create(ctx, dockerMachine); err != nil { + return errors.Wrap(err, "failed to create dockerMachine") + } + } + + return nil +} + func getDockerMachinePoolProviderID(clusterName, dockerMachinePoolName string) string { return fmt.Sprintf("docker:////%s-dmp-%s", clusterName, dockerMachinePoolName) } func patchDockerMachinePool(ctx context.Context, patchHelper *patch.Helper, dockerMachinePool *infraexpv1.DockerMachinePool) error { - // TODO: add conditions + conditions.SetSummary(dockerMachinePool, + conditions.WithConditions( + expv1.ReplicasReadyCondition, + ), + ) // Patch the object, ignoring conflicts on the conditions owned by this controller. return patchHelper.Patch( ctx, dockerMachinePool, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.ReadyCondition, + expv1.ReplicasReadyCondition, + }}, ) } diff --git a/test/infrastructure/docker/exp/internal/docker/nodepool.go b/test/infrastructure/docker/exp/internal/docker/nodepool.go index ba99ac4e2a45..7e18798156ab 100644 --- a/test/infrastructure/docker/exp/internal/docker/nodepool.go +++ b/test/infrastructure/docker/exp/internal/docker/nodepool.go @@ -22,6 +22,7 @@ import ( "encoding/base64" "fmt" "math/rand" + "sort" "strings" "time" @@ -55,16 +56,46 @@ type NodePool struct { dockerMachinePool *infraexpv1.DockerMachinePool labelFilters map[string]string machines []*docker.Machine + nodePoolInstances []NodePoolInstance // Note: This must be initialized when creating a new node pool and updated to reflect the `machines` slice. +} + +// NodePoolInstance is a representation of a node pool instance used to provide the information to construct DockerMachines. +type NodePoolInstance struct { + InstanceName string + Bootstrapped bool + ProviderID *string + PrioritizeDelete bool + Addresses []clusterv1.MachineAddress + Ready bool +} + +func (np NodePool) Len() int { return len(np.machines) } +func (np NodePool) Swap(i, j int) { np.machines[i], np.machines[j] = np.machines[j], np.machines[i] } +func (np NodePool) Less(i, j int) bool { + var instanceI, instanceJ NodePoolInstance + for _, instance := range np.nodePoolInstances { + if instance.InstanceName == np.machines[i].Name() { + instanceI = instance + } + if instance.InstanceName == np.machines[j].Name() { + instanceJ = instance + } + } + if instanceI.PrioritizeDelete == instanceJ.PrioritizeDelete { + return instanceI.InstanceName < instanceJ.InstanceName + } + return instanceJ.PrioritizeDelete } // NewNodePool creates a new node pool instances. -func NewNodePool(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, mp *expv1.MachinePool, dmp *infraexpv1.DockerMachinePool) (*NodePool, error) { +func NewNodePool(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, mp *expv1.MachinePool, dmp *infraexpv1.DockerMachinePool, nodePoolInstances []NodePoolInstance) (*NodePool, error) { np := &NodePool{ client: c, cluster: cluster, machinePool: mp, dockerMachinePool: dmp, labelFilters: map[string]string{dockerMachinePoolLabel: dmp.Name}, + nodePoolInstances: nodePoolInstances, } if err := np.refresh(ctx); err != nil { @@ -73,6 +104,11 @@ func NewNodePool(ctx context.Context, c client.Client, cluster *clusterv1.Cluste return np, nil } +// GetNodePoolInstances returns the node pool instances providing the information to construct DockerMachines. +func (np *NodePool) GetNodePoolInstances() []NodePoolInstance { + return np.nodePoolInstances +} + // ReconcileMachines will build enough machines to satisfy the machine pool / docker machine pool spec // eventually delete all the machine in excess, and update the status for all the machines. // @@ -126,9 +162,9 @@ func (np *NodePool) ReconcileMachines(ctx context.Context, remoteClient client.C // First remove instance status for machines no longer existing, then reconcile the existing machines. // NOTE: the status is the only source of truth for understanding if the machine is already bootstrapped, ready etc. // so we are preserving the existing status and using it as a bases for the next reconcile machine. - instances := make([]infraexpv1.DockerMachinePoolInstanceStatus, 0, len(np.machines)) - for i := range np.dockerMachinePool.Status.Instances { - instance := np.dockerMachinePool.Status.Instances[i] + instances := make([]NodePoolInstance, 0, len(np.machines)) + for i := range np.nodePoolInstances { + instance := np.nodePoolInstances[i] for j := range np.machines { if instance.InstanceName == np.machines[j].Name() { instances = append(instances, instance) @@ -136,7 +172,7 @@ func (np *NodePool) ReconcileMachines(ctx context.Context, remoteClient client.C } } } - np.dockerMachinePool.Status.Instances = instances + np.nodePoolInstances = instances result := ctrl.Result{} for i := range np.machines { @@ -164,6 +200,8 @@ func (np *NodePool) Delete(ctx context.Context) error { } } + // Note: We can set `np.nodePoolInstances = nil` here, but it shouldn't be necessary on Delete(). + return nil } @@ -237,6 +275,9 @@ func (np *NodePool) refresh(ctx context.Context) error { np.machines = append(np.machines, machine) } } + + sort.Sort(np) + return nil } @@ -244,29 +285,28 @@ func (np *NodePool) refresh(ctx context.Context) error { func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machine, remoteClient client.Client) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) - var machineStatus infraexpv1.DockerMachinePoolInstanceStatus + var nodePoolInstance NodePoolInstance isFound := false - for _, instanceStatus := range np.dockerMachinePool.Status.Instances { - if instanceStatus.InstanceName == machine.Name() { - machineStatus = instanceStatus + for _, instance := range np.nodePoolInstances { + if instance.InstanceName == machine.Name() { + nodePoolInstance = instance isFound = true } } if !isFound { log.Info("Creating instance record", "instance", machine.Name()) - machineStatus = infraexpv1.DockerMachinePoolInstanceStatus{ + nodePoolInstance = NodePoolInstance{ InstanceName: machine.Name(), - Version: np.machinePool.Spec.Template.Spec.Version, } - np.dockerMachinePool.Status.Instances = append(np.dockerMachinePool.Status.Instances, machineStatus) + np.nodePoolInstances = append(np.nodePoolInstances, nodePoolInstance) // return to surface the new machine exists. return ctrl.Result{Requeue: true}, nil } defer func() { - for i, instanceStatus := range np.dockerMachinePool.Status.Instances { - if instanceStatus.InstanceName == machine.Name() { - np.dockerMachinePool.Status.Instances[i] = machineStatus + for i, instance := range np.nodePoolInstances { + if instance.InstanceName == machine.Name() { + np.nodePoolInstances[i] = instance } } }() @@ -277,7 +317,7 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin } // if the machine isn't bootstrapped, only then run bootstrap scripts - if !machineStatus.Bootstrapped { + if !nodePoolInstance.Bootstrapped { timeoutCtx, cancel := context.WithTimeout(ctx, 3*time.Minute) defer cancel() @@ -305,13 +345,12 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete") } } - machineStatus.Bootstrapped = true // return to surface the machine has been bootstrapped. return ctrl.Result{Requeue: true}, nil } - if machineStatus.Addresses == nil { + if nodePoolInstance.Addresses == nil { log.Info("Fetching instance addresses", "instance", machine.Name()) // set address in machine status machineAddresses, err := externalMachine.Address(ctx) @@ -321,14 +360,14 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin return ctrl.Result{Requeue: true}, nil //nolint:nilerr } - machineStatus.Addresses = []clusterv1.MachineAddress{ + nodePoolInstance.Addresses = []clusterv1.MachineAddress{ { Type: clusterv1.MachineHostName, Address: externalMachine.ContainerName(), }, } for _, addr := range machineAddresses { - machineStatus.Addresses = append(machineStatus.Addresses, + nodePoolInstance.Addresses = append(nodePoolInstance.Addresses, clusterv1.MachineAddress{ Type: clusterv1.MachineInternalIP, Address: addr, @@ -340,7 +379,7 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin } } - if machineStatus.ProviderID == nil { + if nodePoolInstance.ProviderID == nil { log.Info("Fetching instance provider ID", "instance", machine.Name()) // Usually a cloud provider will do this, but there is no docker-cloud provider. // Requeue if there is an error, as this is likely momentary load balancer @@ -349,12 +388,8 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin log.V(4).Info("transient error setting the provider id") return ctrl.Result{Requeue: true}, nil //nolint:nilerr } - // Set ProviderID so the Cluster API Machine Controller can pull it - providerID := externalMachine.ProviderID() - machineStatus.ProviderID = &providerID } - machineStatus.Ready = true return ctrl.Result{}, nil } diff --git a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go index 7e485f3e477b..ee3f1775985c 100644 --- a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go +++ b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go @@ -147,8 +147,20 @@ func (r *DockerMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reques return ctrl.Result{}, nil } + if _, hasDeleteAnnotation := machine.Annotations[clusterv1.DeleteMachineAnnotation]; hasDeleteAnnotation { + if dockerMachine.Annotations == nil { + dockerMachine.Annotations = map[string]string{} + } + dockerMachine.Annotations[clusterv1.DeleteMachineAnnotation] = "true" + } + + if dockerMachine.Spec.InstanceName == "" { + log.V(2).Info("InstanceName not set for DockerMachine, defaulting to owner machine name", "machine", machine.Name) + dockerMachine.Spec.InstanceName = machine.Name + } + // Create a helper for managing the docker container hosting the machine. - externalMachine, err := docker.NewMachine(ctx, cluster, machine.Name, nil) + externalMachine, err := docker.NewMachine(ctx, cluster, dockerMachine.Spec.InstanceName, nil) if err != nil { return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine") } @@ -211,12 +223,22 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * return ctrl.Result{}, nil } + // Be sure to include the MachinePool label in the DockerMachine along with the selector labels. + _, machinePoolOwned := dockerMachine.Labels[clusterv1.MachinePoolNameLabel] + // if the machine is already provisioned, return if dockerMachine.Spec.ProviderID != nil { // ensure ready state is set. // This is required after move, because status is not moved to the target cluster. dockerMachine.Status.Ready = true + // We only create a DockerMachinePoolMachine if the providerID is set, which implies that the bootstrap has succeeded. + // TODO: verify this logic + if machinePoolOwned { + log.V(2).Info("Marking bootstrap true for machine pool owned machine") + conditions.MarkTrue(dockerMachine, infrav1.BootstrapExecSucceededCondition) + } + if externalMachine.Exists() { conditions.MarkTrue(dockerMachine, infrav1.ContainerProvisionedCondition) // Setting machine address is required after move, because status.Address field is not retained during move. @@ -229,51 +251,52 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * return ctrl.Result{}, nil } - // Make sure bootstrap data is available and populated. - if machine.Spec.Bootstrap.DataSecretName == nil { - if !util.IsControlPlaneMachine(machine) && !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { - log.Info("Waiting for the control plane to be initialized") - conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, clusterv1.WaitingForControlPlaneAvailableReason, clusterv1.ConditionSeverityInfo, "") + if !machinePoolOwned { + // Make sure bootstrap data is available and populated. + if machine.Spec.Bootstrap.DataSecretName == nil { + if !util.IsControlPlaneMachine(machine) && !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { + log.Info("Waiting for the control plane to be initialized") + conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, clusterv1.WaitingForControlPlaneAvailableReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{}, nil + } + + log.Info("Waiting for the Bootstrap provider controller to set bootstrap data") + conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{}, nil } - log.Info("Waiting for the Bootstrap provider controller to set bootstrap data") - conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") - return ctrl.Result{}, nil - } - - // Create the docker container hosting the machine - role := constants.WorkerNodeRoleValue - if util.IsControlPlaneMachine(machine) { - role = constants.ControlPlaneNodeRoleValue - } + // Create the docker container hosting the machine + role := constants.WorkerNodeRoleValue + if util.IsControlPlaneMachine(machine) { + role = constants.ControlPlaneNodeRoleValue + } - // Create the machine if not existing yet - if !externalMachine.Exists() { - // NOTE: FailureDomains don't mean much in CAPD since it's all local, but we are setting a label on - // each container, so we can check placement. - if err := externalMachine.Create(ctx, dockerMachine.Spec.CustomImage, role, machine.Spec.Version, docker.FailureDomainLabel(machine.Spec.FailureDomain), dockerMachine.Spec.ExtraMounts); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to create worker DockerMachine") + // Create the machine if not existing yet + if !machinePoolOwned && !externalMachine.Exists() { + // NOTE: FailureDomains don't mean much in CAPD since it's all local, but we are setting a label on + // each container, so we can check placement. + if err := externalMachine.Create(ctx, dockerMachine.Spec.CustomImage, role, machine.Spec.Version, docker.FailureDomainLabel(machine.Spec.FailureDomain), dockerMachine.Spec.ExtraMounts); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to create worker DockerMachine") + } } - } - // Preload images into the container - if len(dockerMachine.Spec.PreLoadImages) > 0 { - if err := externalMachine.PreloadLoadImages(ctx, dockerMachine.Spec.PreLoadImages); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to pre-load images into the DockerMachine") + // Preload images into the container + if len(dockerMachine.Spec.PreLoadImages) > 0 { + if err := externalMachine.PreloadLoadImages(ctx, dockerMachine.Spec.PreLoadImages); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to pre-load images into the DockerMachine") + } } - } - // if the machine is a control plane update the load balancer configuration - // we should only do this once, as reconfiguration more or less ensures - // node ref setting fails - if util.IsControlPlaneMachine(machine) && !dockerMachine.Status.LoadBalancerConfigured { - if err := externalLoadBalancer.UpdateConfiguration(ctx); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to update DockerCluster.loadbalancer configuration") + // if the machine is a control plane update the load balancer configuration + // we should only do this once, as reconfiguration more or less ensures + // node ref setting fails + if util.IsControlPlaneMachine(machine) && !dockerMachine.Status.LoadBalancerConfigured { + if err := externalLoadBalancer.UpdateConfiguration(ctx); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to update DockerCluster.loadbalancer configuration") + } + dockerMachine.Status.LoadBalancerConfigured = true } - dockerMachine.Status.LoadBalancerConfigured = true } - // Update the ContainerProvisionedCondition condition // NOTE: it is required to create the patch helper before this change otherwise it wont surface if // we issue a patch down in the code (because if we create patch helper after this point the ContainerProvisionedCondition=True exists both on before and after). @@ -304,38 +327,41 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * // but bootstrapped is never set on the object. We only try to bootstrap if the machine // is not already bootstrapped. if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, false); err != nil { - bootstrapData, format, err := r.getBootstrapData(timeoutCtx, machine) - if err != nil { - return ctrl.Result{}, err - } + if !machinePoolOwned { + bootstrapData, format, err := r.getBootstrapData(timeoutCtx, machine) + if err != nil { + return ctrl.Result{}, err + } - // Setup a go routing to check for the machine being deleted while running bootstrap as a - // synchronous process, e.g. due to remediation. The routine stops when timeoutCtx is Done - // (either because canceled intentionally due to machine deletion or canceled by the defer cancel() - // call when exiting from this func). - go func() { - for { - select { - case <-timeoutCtx.Done(): - return - default: - updatedDockerMachine := &infrav1.DockerMachine{} - if err := r.Client.Get(ctx, client.ObjectKeyFromObject(dockerMachine), updatedDockerMachine); err == nil && - !updatedDockerMachine.DeletionTimestamp.IsZero() { - log.Info("Cancelling Bootstrap because the underlying machine has been deleted") - cancel() + // Setup a go routing to check for the machine being deleted while running bootstrap as a + // synchronous process, e.g. due to remediation. The routine stops when timeoutCtx is Done + // (either because canceled intentionally due to machine deletion or canceled by the defer cancel() + // call when exiting from this func). + go func() { + for { + select { + case <-timeoutCtx.Done(): return + default: + updatedDockerMachine := &infrav1.DockerMachine{} + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(dockerMachine), updatedDockerMachine); err == nil && + !updatedDockerMachine.DeletionTimestamp.IsZero() { + log.Info("Cancelling Bootstrap because the underlying machine has been deleted") + cancel() + return + } + time.Sleep(5 * time.Second) } - time.Sleep(5 * time.Second) } - } - }() + }() - // Run the bootstrap script. Simulates cloud-init/Ignition. - if err := externalMachine.ExecBootstrap(timeoutCtx, bootstrapData, format); err != nil { - conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap") - return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap") + // Run the bootstrap script. Simulates cloud-init/Ignition. + if err := externalMachine.ExecBootstrap(timeoutCtx, bootstrapData, format); err != nil { + conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap") + return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap") + } } + // Check for bootstrap success if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, true); err != nil { conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap") diff --git a/util/labels/helpers.go b/util/labels/helpers.go index 848ec37cdf40..43f7c72e902f 100644 --- a/util/labels/helpers.go +++ b/util/labels/helpers.go @@ -37,3 +37,14 @@ func HasWatchLabel(o metav1.Object, labelValue string) bool { } return val == labelValue } + +// SetObjectLabel sets the given key to "true" on the object's labels. +func SetObjectLabel(o metav1.Object, key string) { + labels := o.GetLabels() + if labels == nil { + labels = make(map[string]string) + } + labels[key] = "true" + + o.SetLabels(labels) +} diff --git a/util/labels/helpers_test.go b/util/labels/helpers_test.go index d627014800f2..86677e2cf7b6 100644 --- a/util/labels/helpers_test.go +++ b/util/labels/helpers_test.go @@ -84,3 +84,48 @@ func TestHasWatchLabel(t *testing.T) { }) } } + +func TestSetObjectLabel(t *testing.T) { + g := NewWithT(t) + + var testcases = []struct { + name string + obj metav1.Object + }{ + { + name: "labels are nil map, set label", + obj: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{}, + }, + }, + { + name: "labels are not nil, set label", + obj: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{}, + }, + }, + }, + { + name: "label exists in map, overwrite label", + obj: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + clusterv1.ClusterTopologyOwnedLabel: "random-value", + }, + }, + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + SetObjectLabel(tc.obj, clusterv1.ClusterTopologyOwnedLabel) + labels := tc.obj.GetLabels() + + val, ok := labels[clusterv1.ClusterTopologyOwnedLabel] + g.Expect(ok).To(BeTrue()) + g.Expect(val).To(Equal("true")) + }) + } +}