Skip to content

Commit

Permalink
✨ machine: Introduce Deletion status field and add timestamps for dra…
Browse files Browse the repository at this point in the history
…in and volumeDetach instead of using the condition (#11166)

* machine: Introduce Deletion status field and add timestamps for drain and volumeDetach instead of using the condition

* fix tests

* make generate

* review fixes

* fix openapi gen

* review fixes

* fix
  • Loading branch information
chrischdi authored Sep 30, 2024
1 parent 65fbabe commit f8b7deb
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 55 deletions.
22 changes: 22 additions & 0 deletions api/v1beta1/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,32 @@ type MachineStatus struct {
// Conditions defines current service state of the Machine.
// +optional
Conditions Conditions `json:"conditions,omitempty"`

// deletion contains information relating to removal of the Machine.
// Only present when the Machine has a deletionTimestamp and drain or wait for volume detach started.
// +optional
Deletion *MachineDeletionStatus `json:"deletion,omitempty"`
}

// ANCHOR_END: MachineStatus

// MachineDeletionStatus is the deletion state of the Machine.
type MachineDeletionStatus struct {
// nodeDrainStartTime is the time when the drain of the node started and is used to determine
// if the NodeDrainTimeout is exceeded.
// Only present when the Machine has a deletionTimestamp and draining the node had been started.
// +optional
NodeDrainStartTime *metav1.Time `json:"nodeDrainStartTime,omitempty"`

// waitForNodeVolumeDetachStartTime is the time when waiting for volume detachment started
// and is used to determine if the NodeVolumeDetachTimeout is exceeded.
// Detaching volumes from nodes is usually done by CSI implementations and the current state
// is observed from the node's `.Status.VolumesAttached` field.
// Only present when the Machine has a deletionTimestamp and waiting for volume detachments had been started.
// +optional
WaitForNodeVolumeDetachStartTime *metav1.Time `json:"waitForNodeVolumeDetachStartTime,omitempty"`
}

// SetTypedPhase sets the Phase field to the string representation of MachinePhase.
func (m *MachineStatus) SetTypedPhase(p MachinePhase) {
m.Phase = string(p)
Expand Down
28 changes: 28 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 35 additions & 1 deletion api/v1beta1/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions config/crd/bases/cluster.x-k8s.io_machines.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions internal/apis/core/v1alpha3/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
dst.Status.NodeInfo = restored.Status.NodeInfo
dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
dst.Status.Deletion = restored.Status.Deletion
return nil
}

Expand Down
1 change: 1 addition & 0 deletions internal/apis/core/v1alpha3/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions internal/apis/core/v1alpha4/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
dst.Spec.NodeDeletionTimeout = restored.Spec.NodeDeletionTimeout
dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
dst.Status.Deletion = restored.Status.Deletion
return nil
}

Expand Down
1 change: 1 addition & 0 deletions internal/apis/core/v1alpha4/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 27 additions & 18 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -388,13 +389,18 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
return ctrl.Result{}, err
}

// The DrainingSucceededCondition never exists before the node is drained for the first time,
// so its transition time can be used to record the first time draining.
// This `if` condition prevents the transition time to be changed more than once.
// The DrainingSucceededCondition never exists before the node is drained for the first time.
if conditions.Get(m, clusterv1.DrainingSucceededCondition) == nil {
conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion")
}

if m.Status.Deletion == nil {
m.Status.Deletion = &clusterv1.MachineDeletionStatus{}
}
if m.Status.Deletion.NodeDrainStartTime == nil {
m.Status.Deletion.NodeDrainStartTime = ptr.To(metav1.Now())
}

if err := patchMachine(ctx, patchHelper, m); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
}
Expand All @@ -418,13 +424,18 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
// volumes are detached before proceeding to delete the Node.
// In case the node is unreachable, the detachment is skipped.
if r.isNodeVolumeDetachingAllowed(m) {
// The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time,
// so its transition time can be used to record the first time we wait for volume detachment.
// This `if` condition prevents the transition time to be changed more than once.
// The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time.
if conditions.Get(m, clusterv1.VolumeDetachSucceededCondition) == nil {
conditions.MarkFalse(m, clusterv1.VolumeDetachSucceededCondition, clusterv1.WaitingForVolumeDetachReason, clusterv1.ConditionSeverityInfo, "Waiting for node volumes to be detached")
}

if m.Status.Deletion == nil {
m.Status.Deletion = &clusterv1.MachineDeletionStatus{}
}
if m.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
m.Status.Deletion.WaitForNodeVolumeDetachStartTime = ptr.To(metav1.Now())
}

if ok, err := r.shouldWaitForNodeVolumes(ctx, cluster, m.Status.NodeRef.Name); ok || err != nil {
if err != nil {
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedWaitForVolumeDetach", "error waiting for node volumes detaching, Machine's node %q: %v", m.Status.NodeRef.Name, err)
Expand Down Expand Up @@ -540,38 +551,36 @@ func (r *Reconciler) isNodeVolumeDetachingAllowed(m *clusterv1.Machine) bool {

func (r *Reconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool {
// if the NodeDrainTimeout type is not set by user
if machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 {
if machine.Status.Deletion == nil || machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 {
return false
}

// if the draining succeeded condition does not exist
if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil {
// if the NodeDrainStartTime does not exist
if machine.Status.Deletion.NodeDrainStartTime == nil {
return false
}

now := time.Now()
firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition)
diff := now.Sub(firstTimeDrain.Time)
diff := now.Sub(machine.Status.Deletion.NodeDrainStartTime.Time)
return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds()
}

// nodeVolumeDetachTimeoutExceeded returns False if either NodeVolumeDetachTimeout is set to nil or <=0 OR
// VolumeDetachSucceededCondition is not set on the Machine. Otherwise returns true if the timeout is expired
// since the last transition time of VolumeDetachSucceededCondition.
// WaitForNodeVolumeDetachStartTime is not set on the Machine. Otherwise returns true if the timeout is expired
// since the WaitForNodeVolumeDetachStartTime.
func (r *Reconciler) nodeVolumeDetachTimeoutExceeded(machine *clusterv1.Machine) bool {
// if the NodeVolumeDetachTimeout type is not set by user
if machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
if machine.Status.Deletion == nil || machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
return false
}

// if the volume detaching succeeded condition does not exist
if conditions.Get(machine, clusterv1.VolumeDetachSucceededCondition) == nil {
// if the WaitForNodeVolumeDetachStartTime does not exist
if machine.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
return false
}

now := time.Now()
firstTimeDetach := conditions.GetLastTransitionTime(machine, clusterv1.VolumeDetachSucceededCondition)
diff := now.Sub(firstTimeDetach.Time)
diff := now.Sub(machine.Status.Deletion.WaitForNodeVolumeDetachStartTime.Time)
return diff.Seconds() >= machine.Spec.NodeVolumeDetachTimeout.Seconds()
}

Expand Down
48 changes: 12 additions & 36 deletions internal/controllers/machine/machine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1386,12 +1386,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
},

Status: clusterv1.MachineStatus{
Conditions: clusterv1.Conditions{
{
Type: clusterv1.DrainingSucceededCondition,
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()},
},
Deletion: &clusterv1.MachineDeletionStatus{
NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()},
},
},
},
Expand All @@ -1412,12 +1408,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
NodeDrainTimeout: &metav1.Duration{Duration: time.Second * 60},
},
Status: clusterv1.MachineStatus{
Conditions: clusterv1.Conditions{
{
Type: clusterv1.DrainingSucceededCondition,
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
},
Deletion: &clusterv1.MachineDeletionStatus{
NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
},
},
},
Expand All @@ -1437,12 +1429,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
},
Status: clusterv1.MachineStatus{
Conditions: clusterv1.Conditions{
{
Type: clusterv1.DrainingSucceededCondition,
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
},
Deletion: &clusterv1.MachineDeletionStatus{
NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
},
},
},
Expand Down Expand Up @@ -1896,12 +1884,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
},

Status: clusterv1.MachineStatus{
Conditions: clusterv1.Conditions{
{
Type: clusterv1.VolumeDetachSucceededCondition,
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 60)).UTC()},
},
Deletion: &clusterv1.MachineDeletionStatus{
WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 60)).UTC()},
},
},
},
Expand All @@ -1922,12 +1906,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
NodeVolumeDetachTimeout: &metav1.Duration{Duration: time.Second * 60},
},
Status: clusterv1.MachineStatus{
Conditions: clusterv1.Conditions{
{
Type: clusterv1.VolumeDetachSucceededCondition,
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
},
Deletion: &clusterv1.MachineDeletionStatus{
WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
},
},
},
Expand All @@ -1947,12 +1927,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
},
Status: clusterv1.MachineStatus{
Conditions: clusterv1.Conditions{
{
Type: clusterv1.VolumeDetachSucceededCondition,
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
},
Deletion: &clusterv1.MachineDeletionStatus{
WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
},
},
},
Expand Down

0 comments on commit f8b7deb

Please sign in to comment.