diff --git a/pkg/controller/deployment_inplace.go b/pkg/controller/deployment_inplace.go index 12188844a..914f707df 100644 --- a/pkg/controller/deployment_inplace.go +++ b/pkg/controller/deployment_inplace.go @@ -8,6 +8,7 @@ import ( "context" "fmt" "maps" + "slices" "sort" "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" @@ -69,7 +70,7 @@ func (dc *controller) rolloutInPlace(ctx context.Context, d *v1alpha1.MachineDep oldMachineSets, &v1.Taint{ Key: PreferNoScheduleKey, Value: "True", - Effect: "PreferNoSchedule", + Effect: v1.TaintEffectPreferNoSchedule, }, ) if err != nil { @@ -193,6 +194,18 @@ func (dc *controller) syncMachineSets(ctx context.Context, oldMachineSets []*v1a // uncordon the node since the inplace update is successful. node.Spec.Unschedulable = false + // remove the PreferNoSchedule taint if it exists which was added during the inplace update. + node.Spec.Taints = slices.DeleteFunc(node.Spec.Taints, func(t v1.Taint) bool { + return t.Key == PreferNoScheduleKey && t.Value == "True" && t.Effect == v1.TaintEffectPreferNoSchedule + }) + + // add the critical components not ready taint to the node. This is to ensure that + // workload pods are not scheduled on the node until the critical components pods are ready. + node.Spec.Taints = append(node.Spec.Taints, v1.Taint{ + Key: machineutils.TaintNodeCriticalComponentsNotReady, + Effect: v1.TaintEffectNoSchedule, + }) + _, err = dc.targetCoreClient.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}) if err != nil { return fmt.Errorf("failed to remove inplace labels/annotations and uncordon node %s: %w", node.Name, err) diff --git a/pkg/util/provider/machinecontroller/controller.go b/pkg/util/provider/machinecontroller/controller.go index ab63d84b4..2727ecb4c 100644 --- a/pkg/util/provider/machinecontroller/controller.go +++ b/pkg/util/provider/machinecontroller/controller.go @@ -44,7 +44,7 @@ import ( ) const ( - // MCMFinalizerName is the finalizer used to tag dependecies before deletion + // MCMFinalizerName is the finalizer used to tag dependencies before deletion // of the object. This finalizer is carried over from the MCM MCMFinalizerName = "machine.sapcloud.io/machine-controller-manager" // MCFinalizerName is the finalizer created for the external diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 29125113a..3f554e03d 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -954,8 +954,8 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph // if the label update successful or failed, then skip the timeout check if node != nil && metav1.HasLabel(node.ObjectMeta, v1alpha1.LabelKeyNodeUpdateResult) { if node.Labels[v1alpha1.LabelKeyNodeUpdateResult] == v1alpha1.LabelValueNodeUpdateSuccessful && clone.Status.CurrentStatus.Phase != v1alpha1.MachineInPlaceUpdateSuccessful { - description = fmt.Sprintf("Machine %s successfully updated dependecies", machine.Name) - klog.V(2).Infof("%s with backing node %q and providerID %q sucessfully update the dependecies", description, getNodeName(machine), getProviderID(machine)) + description = fmt.Sprintf("Machine %s successfully updated dependencies", machine.Name) + klog.V(2).Infof("%s with backing node %q and providerID %q sucessfully update the dependencies", description, getNodeName(machine), getProviderID(machine)) clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ Phase: v1alpha1.MachineInPlaceUpdateSuccessful, LastUpdateTime: metav1.Now(), @@ -968,8 +968,8 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph } cloneDirty = true } else if node.Labels[v1alpha1.LabelKeyNodeUpdateResult] == v1alpha1.LabelValueNodeUpdateFailed && clone.Status.CurrentStatus.Phase != v1alpha1.MachineInPlaceUpdateFailed { - description = fmt.Sprintf("Machine %s failed to update dependecies: %s", machine.Name, node.Annotations[v1alpha1.AnnotationKeyMachineUpdateFailedReason]) - klog.V(2).Infof("%s with backing node %q and providerID %q failed to update dependecies", description, getNodeName(machine), getProviderID(machine)) + description = fmt.Sprintf("Machine %s failed to update dependencies: %s", machine.Name, node.Annotations[v1alpha1.AnnotationKeyMachineUpdateFailedReason]) + klog.V(2).Infof("%s with backing node %q and providerID %q failed to update dependencies", description, getNodeName(machine), getProviderID(machine)) clone.Status.CurrentStatus = v1alpha1.CurrentStatus{ Phase: v1alpha1.MachineInPlaceUpdateFailed, LastUpdateTime: metav1.Now(),