From e0369bcfa4f5ec486a31ee234fa1aac04e89b2c0 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 12:48:08 -0500 Subject: [PATCH 01/18] cmd/helpers: Add signal handler helper Both machine-config-controller and machine-config-operator have the need of a signal handler to prompt them to gracefully shut down. This adds one to our cmd helpers to avoid code duplication. --- cmd/common/helpers.go | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/cmd/common/helpers.go b/cmd/common/helpers.go index c129e6a721..edc902714a 100644 --- a/cmd/common/helpers.go +++ b/cmd/common/helpers.go @@ -2,6 +2,8 @@ package common import ( "os" + "os/signal" + "syscall" "github.com/golang/glog" "github.com/openshift/machine-config-operator/internal/clients" @@ -50,7 +52,7 @@ func CreateResourceLock(cb *clients.Builder, componentNamespace, componentName s } // GetLeaderElectionConfig returns leader election configs defaults based on the cluster topology -func GetLeaderElectionConfig(restcfg *rest.Config) configv1.LeaderElection { +func GetLeaderElectionConfig(ctx context.Context, restcfg *rest.Config) configv1.LeaderElection { // Defaults follow conventions // https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#high-availability @@ -59,7 +61,7 @@ func GetLeaderElectionConfig(restcfg *rest.Config) configv1.LeaderElection { "", "", ) - if infra, err := clusterstatus.GetClusterInfraStatus(context.TODO(), restcfg); err == nil && infra != nil { + if infra, err := clusterstatus.GetClusterInfraStatus(ctx, restcfg); err == nil && infra != nil { if infra.ControlPlaneTopology == configv1.SingleReplicaTopologyMode { return leaderelection.LeaderElectionSNOConfig(defaultLeaderElection) } @@ -69,3 +71,23 @@ func GetLeaderElectionConfig(restcfg *rest.Config) configv1.LeaderElection { return defaultLeaderElection } + +// SignalHandler catches SIGINT/SIGTERM signals and makes sure the passed context gets cancelled when those signals happen. This allows us to use a +// context to shut down our operations cleanly when we are signalled to shutdown. +func SignalHandler(runCancel context.CancelFunc) { + + // make a signal handling channel for os signals + ch := make(chan os.Signal, 1) + // stop listening for signals when we leave this function + defer func() { signal.Stop(ch) }() + // catch SIGINT and SIGTERM + signal.Notify(ch, os.Interrupt, syscall.SIGTERM) + sig := <-ch + glog.Infof("Shutting down due to: %s", sig) + // if we're shutting down, cancel the context so everything else will stop + runCancel() + glog.Infof("Context cancelled") + sig = <-ch + glog.Fatalf("Received shutdown signal twice, exiting: %s", sig) + +} From 0480f42ad5a18d27d12a2554fa36e7a6379d35fd Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 12:49:57 -0500 Subject: [PATCH 02/18] internal/kubeutils: context-ify UpdateNodeRetry The UpdateNodeRetry function has a Patch call that needs to be cancellable. This adds a context to UpdateNodeRetry so that underling Patch API call call can be cancelled --- internal/kubeutils.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/kubeutils.go b/internal/kubeutils.go index 957a49fcf2..381a886e14 100644 --- a/internal/kubeutils.go +++ b/internal/kubeutils.go @@ -19,7 +19,7 @@ import ( // number of times. // f will be called each time since the node object will likely have changed if // a retry is necessary. -func UpdateNodeRetry(client corev1client.NodeInterface, lister corev1lister.NodeLister, nodeName string, f func(*corev1.Node)) (*corev1.Node, error) { +func UpdateNodeRetry(ctx context.Context, client corev1client.NodeInterface, lister corev1lister.NodeLister, nodeName string, f func(*corev1.Node)) (*corev1.Node, error) { var node *corev1.Node if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { n, err := lister.Get(nodeName) @@ -44,7 +44,7 @@ func UpdateNodeRetry(client corev1client.NodeInterface, lister corev1lister.Node return fmt.Errorf("failed to create patch for node %q: %w", nodeName, err) } - node, err = client.Patch(context.TODO(), nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) + node, err = client.Patch(ctx, nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) return err }); err != nil { // may be conflict if max retries were hit From cf50066a2614bb600ea8b103bc0339af81fc0a7f Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 12:54:28 -0500 Subject: [PATCH 03/18] lib/resourceapply: Add contexts to support cancel The resource application functions did not previously support context cancellation, but their underlying API calls did. This makes the resource apply functions support context cancellation all the way through, so we can properly cancel them in cases like when we need to shut down our controllers/operators. --- lib/resourceapply/apps.go | 16 ++++++++-------- lib/resourceapply/machineconfig.go | 24 ++++++++++++------------ lib/resourceapply/machineconfig_test.go | 3 ++- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/lib/resourceapply/apps.go b/lib/resourceapply/apps.go index c7ea9ccc58..7afc18cf23 100644 --- a/lib/resourceapply/apps.go +++ b/lib/resourceapply/apps.go @@ -12,10 +12,10 @@ import ( ) // ApplyDaemonSet applies the required daemonset to the cluster. -func ApplyDaemonSet(client appsclientv1.DaemonSetsGetter, required *appsv1.DaemonSet) (*appsv1.DaemonSet, bool, error) { - existing, err := client.DaemonSets(required.Namespace).Get(context.TODO(), required.Name, metav1.GetOptions{}) +func ApplyDaemonSet(ctx context.Context, client appsclientv1.DaemonSetsGetter, required *appsv1.DaemonSet) (*appsv1.DaemonSet, bool, error) { + existing, err := client.DaemonSets(required.Namespace).Get(ctx, required.Name, metav1.GetOptions{}) if apierrors.IsNotFound(err) { - actual, err := client.DaemonSets(required.Namespace).Create(context.TODO(), required, metav1.CreateOptions{}) + actual, err := client.DaemonSets(required.Namespace).Create(ctx, required, metav1.CreateOptions{}) return actual, true, err } if err != nil { @@ -28,15 +28,15 @@ func ApplyDaemonSet(client appsclientv1.DaemonSetsGetter, required *appsv1.Daemo return existing, false, nil } - actual, err := client.DaemonSets(required.Namespace).Update(context.TODO(), existing, metav1.UpdateOptions{}) + actual, err := client.DaemonSets(required.Namespace).Update(ctx, existing, metav1.UpdateOptions{}) return actual, true, err } // ApplyDeployment applies the required deployment to the cluster. -func ApplyDeployment(client appsclientv1.DeploymentsGetter, required *appsv1.Deployment) (*appsv1.Deployment, bool, error) { - existing, err := client.Deployments(required.Namespace).Get(context.TODO(), required.Name, metav1.GetOptions{}) +func ApplyDeployment(ctx context.Context, client appsclientv1.DeploymentsGetter, required *appsv1.Deployment) (*appsv1.Deployment, bool, error) { + existing, err := client.Deployments(required.Namespace).Get(ctx, required.Name, metav1.GetOptions{}) if apierrors.IsNotFound(err) { - actual, err := client.Deployments(required.Namespace).Create(context.TODO(), required, metav1.CreateOptions{}) + actual, err := client.Deployments(required.Namespace).Create(ctx, required, metav1.CreateOptions{}) return actual, true, err } if err != nil { @@ -49,6 +49,6 @@ func ApplyDeployment(client appsclientv1.DeploymentsGetter, required *appsv1.Dep return existing, false, nil } - actual, err := client.Deployments(required.Namespace).Update(context.TODO(), existing, metav1.UpdateOptions{}) + actual, err := client.Deployments(required.Namespace).Update(ctx, existing, metav1.UpdateOptions{}) return actual, true, err } diff --git a/lib/resourceapply/machineconfig.go b/lib/resourceapply/machineconfig.go index fba21d28ff..f71fddeaa7 100644 --- a/lib/resourceapply/machineconfig.go +++ b/lib/resourceapply/machineconfig.go @@ -12,10 +12,10 @@ import ( ) // ApplyMachineConfig applies the required machineconfig to the cluster. -func ApplyMachineConfig(client mcfgclientv1.MachineConfigsGetter, required *mcfgv1.MachineConfig) (*mcfgv1.MachineConfig, bool, error) { - existing, err := client.MachineConfigs().Get(context.TODO(), required.GetName(), metav1.GetOptions{}) +func ApplyMachineConfig(ctx context.Context, client mcfgclientv1.MachineConfigsGetter, required *mcfgv1.MachineConfig) (*mcfgv1.MachineConfig, bool, error) { + existing, err := client.MachineConfigs().Get(ctx, required.GetName(), metav1.GetOptions{}) if apierrors.IsNotFound(err) { - actual, err := client.MachineConfigs().Create(context.TODO(), required, metav1.CreateOptions{}) + actual, err := client.MachineConfigs().Create(ctx, required, metav1.CreateOptions{}) return actual, true, err } if err != nil { @@ -28,15 +28,15 @@ func ApplyMachineConfig(client mcfgclientv1.MachineConfigsGetter, required *mcfg return existing, false, nil } - actual, err := client.MachineConfigs().Update(context.TODO(), existing, metav1.UpdateOptions{}) + actual, err := client.MachineConfigs().Update(ctx, existing, metav1.UpdateOptions{}) return actual, true, err } // ApplyMachineConfigPool applies the required machineconfig to the cluster. -func ApplyMachineConfigPool(client mcfgclientv1.MachineConfigPoolsGetter, required *mcfgv1.MachineConfigPool) (*mcfgv1.MachineConfigPool, bool, error) { - existing, err := client.MachineConfigPools().Get(context.TODO(), required.GetName(), metav1.GetOptions{}) +func ApplyMachineConfigPool(ctx context.Context, client mcfgclientv1.MachineConfigPoolsGetter, required *mcfgv1.MachineConfigPool) (*mcfgv1.MachineConfigPool, bool, error) { + existing, err := client.MachineConfigPools().Get(ctx, required.GetName(), metav1.GetOptions{}) if apierrors.IsNotFound(err) { - actual, err := client.MachineConfigPools().Create(context.TODO(), required, metav1.CreateOptions{}) + actual, err := client.MachineConfigPools().Create(ctx, required, metav1.CreateOptions{}) return actual, true, err } if err != nil { @@ -49,15 +49,15 @@ func ApplyMachineConfigPool(client mcfgclientv1.MachineConfigPoolsGetter, requir return existing, false, nil } - actual, err := client.MachineConfigPools().Update(context.TODO(), existing, metav1.UpdateOptions{}) + actual, err := client.MachineConfigPools().Update(ctx, existing, metav1.UpdateOptions{}) return actual, true, err } // ApplyControllerConfig applies the required machineconfig to the cluster. -func ApplyControllerConfig(client mcfgclientv1.ControllerConfigsGetter, required *mcfgv1.ControllerConfig) (*mcfgv1.ControllerConfig, bool, error) { - existing, err := client.ControllerConfigs().Get(context.TODO(), required.GetName(), metav1.GetOptions{}) +func ApplyControllerConfig(ctx context.Context, client mcfgclientv1.ControllerConfigsGetter, required *mcfgv1.ControllerConfig) (*mcfgv1.ControllerConfig, bool, error) { + existing, err := client.ControllerConfigs().Get(ctx, required.GetName(), metav1.GetOptions{}) if apierrors.IsNotFound(err) { - actual, err := client.ControllerConfigs().Create(context.TODO(), required, metav1.CreateOptions{}) + actual, err := client.ControllerConfigs().Create(ctx, required, metav1.CreateOptions{}) return actual, true, err } if err != nil { @@ -70,6 +70,6 @@ func ApplyControllerConfig(client mcfgclientv1.ControllerConfigsGetter, required return existing, false, nil } - actual, err := client.ControllerConfigs().Update(context.TODO(), existing, metav1.UpdateOptions{}) + actual, err := client.ControllerConfigs().Update(ctx, existing, metav1.UpdateOptions{}) return actual, true, err } diff --git a/lib/resourceapply/machineconfig_test.go b/lib/resourceapply/machineconfig_test.go index c0a1906cd9..1a621bcc69 100644 --- a/lib/resourceapply/machineconfig_test.go +++ b/lib/resourceapply/machineconfig_test.go @@ -1,6 +1,7 @@ package resourceapply import ( + "context" "fmt" "testing" @@ -285,7 +286,7 @@ func TestApplyMachineConfig(t *testing.T) { for idx, test := range tests { t.Run(fmt.Sprintf("test#%d", idx), func(t *testing.T) { client := fake.NewSimpleClientset(test.existing...) - _, actualModified, err := ApplyMachineConfig(client.MachineconfigurationV1(), test.input) + _, actualModified, err := ApplyMachineConfig(context.TODO(), client.MachineconfigurationV1(), test.input) if err != nil { t.Fatal(err) } From e4101b0359b693f90ab0f6c617ae29f353913846 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 12:56:28 -0500 Subject: [PATCH 04/18] add Name() to controller interface definition When looping through controllers, like we do in our controller start functions in start.go, it is nice to know which controller is being started. Rather than change the familiar slice/append layout we've grown so used to (and keep the controllers in say, a map or something indexed by name, I figured it would be better to have each operator know it's own name so we can retrieve it. --- pkg/controller/common/controller.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/controller/common/controller.go b/pkg/controller/common/controller.go index a0064ccf18..4b2e549864 100644 --- a/pkg/controller/common/controller.go +++ b/pkg/controller/common/controller.go @@ -1,6 +1,9 @@ package common +import "context" + // Controller is the common interface all controllers implement type Controller interface { - Run(workers int, stopCh <-chan struct{}) + Run(ctx context.Context, workers int) + Name() string } From 16ac9983769864a55cb9164c4ced226d899e1ce4 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 12:59:40 -0500 Subject: [PATCH 05/18] Make GetManagedKey helper cancellable with context The GetManagedKey helper function does a bunch of gets/creates that need to be cancellable. This modifies GetManagedKey to receive a context argument so those gets/creates can be cancelled via that context. --- pkg/controller/common/helpers.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/controller/common/helpers.go b/pkg/controller/common/helpers.go index f2640dd815..9121c25cc6 100644 --- a/pkg/controller/common/helpers.go +++ b/pkg/controller/common/helpers.go @@ -673,16 +673,16 @@ func MachineConfigFromRawIgnConfig(role, name string, rawIgnCfg []byte) (*mcfgv1 } // GetManagedKey returns the managed key for sub-controllers, handling any migration needed -func GetManagedKey(pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface, prefix, suffix, deprecatedKey string) (string, error) { +func GetManagedKey(ctx context.Context, pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface, prefix, suffix, deprecatedKey string) (string, error) { managedKey := fmt.Sprintf("%s-%s-generated-%s", prefix, pool.Name, suffix) // if we don't have a client, we're installing brand new, and we don't need to adjust for backward compatibility if client == nil { return managedKey, nil } - if _, err := client.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), managedKey, metav1.GetOptions{}); err == nil { + if _, err := client.MachineconfigurationV1().MachineConfigs().Get(ctx, managedKey, metav1.GetOptions{}); err == nil { return managedKey, nil } - old, err := client.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), deprecatedKey, metav1.GetOptions{}) + old, err := client.MachineconfigurationV1().MachineConfigs().Get(ctx, deprecatedKey, metav1.GetOptions{}) if err != nil && !kerr.IsNotFound(err) { return "", fmt.Errorf("could not get MachineConfig %q: %w", deprecatedKey, err) } @@ -695,11 +695,11 @@ func GetManagedKey(pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interfac if err != nil { return "", err } - _, err = client.MachineconfigurationV1().MachineConfigs().Create(context.TODO(), mc, metav1.CreateOptions{}) + _, err = client.MachineconfigurationV1().MachineConfigs().Create(ctx, mc, metav1.CreateOptions{}) if err != nil { return "", err } - err = client.MachineconfigurationV1().MachineConfigs().Delete(context.TODO(), deprecatedKey, metav1.DeleteOptions{}) + err = client.MachineconfigurationV1().MachineConfigs().Delete(ctx, deprecatedKey, metav1.DeleteOptions{}) return managedKey, err } From 4b24c1565dfee7ed11b6c18c63ca0e61297192b1 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:02:16 -0500 Subject: [PATCH 06/18] Make metrics listener shutdown more clear Previously we did not shutdown the metric listener properly, so this didn't matter, but it would always report an error (even if the error was 'nil'). This adjusts it so it makes it clear in the logs what the status of the shutdown is, and it no longer emits an error on successful shutdown --- pkg/controller/common/metrics.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pkg/controller/common/metrics.go b/pkg/controller/common/metrics.go index c09b6ccba9..49caa38b75 100644 --- a/pkg/controller/common/metrics.go +++ b/pkg/controller/common/metrics.go @@ -39,7 +39,7 @@ func RegisterMCCMetrics() error { } // StartMetricsListener is metrics listener via http on localhost -func StartMetricsListener(addr string, stopCh <-chan struct{}) { +func StartMetricsListener(addr string, stopCh <-chan struct{}) error { if addr == "" { addr = DefaultBindAddress } @@ -48,7 +48,7 @@ func StartMetricsListener(addr string, stopCh <-chan struct{}) { if err := RegisterMCCMetrics(); err != nil { glog.Errorf("unable to register metrics: %v", err) // No sense in continuing starting the listener if this fails - return + return err } glog.Infof("Starting metrics listener on %s", addr) @@ -62,7 +62,14 @@ func StartMetricsListener(addr string, stopCh <-chan struct{}) { } }() <-stopCh - if err := s.Shutdown(context.Background()); err != http.ErrServerClosed { - glog.Errorf("error stopping metrics listener: %v", err) + if err := s.Shutdown(context.Background()); err != nil { + if err != http.ErrServerClosed { + glog.Errorf("error stopping metrics listener: %v", err) + return err + } + } else { + glog.Infof("Metrics listener successfully stopped") } + + return nil } From 89588ba62e2625654469a270e1f1dafdcd047a68 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:08:04 -0500 Subject: [PATCH 07/18] Context cancellation for bootstrap controller Not that we necessarily need it, but this adds contexts to all of the functions in the bootstrap controller that need to be cancelled to shut down properly. --- cmd/machine-config-controller/bootstrap.go | 3 ++- pkg/controller/bootstrap/bootstrap.go | 13 +++++++------ pkg/controller/bootstrap/bootstrap_test.go | 3 ++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cmd/machine-config-controller/bootstrap.go b/cmd/machine-config-controller/bootstrap.go index 8d9384f320..1e1f1a9134 100644 --- a/cmd/machine-config-controller/bootstrap.go +++ b/cmd/machine-config-controller/bootstrap.go @@ -1,6 +1,7 @@ package main import ( + "context" "flag" "github.com/golang/glog" @@ -43,7 +44,7 @@ func runbootstrapCmd(cmd *cobra.Command, args []string) { glog.Fatalf("--dest-dir or --manifest-dir not set") } - if err := bootstrap.New(rootOpts.templates, bootstrapOpts.manifestsDir, bootstrapOpts.pullSecretFile).Run(bootstrapOpts.destinationDir); err != nil { + if err := bootstrap.New(rootOpts.templates, bootstrapOpts.manifestsDir, bootstrapOpts.pullSecretFile).Run(context.TODO(), bootstrapOpts.destinationDir); err != nil { glog.Fatalf("error running MCC[BOOTSTRAP]: %v", err) } } diff --git a/pkg/controller/bootstrap/bootstrap.go b/pkg/controller/bootstrap/bootstrap.go index 252d766895..8877c90c4c 100644 --- a/pkg/controller/bootstrap/bootstrap.go +++ b/pkg/controller/bootstrap/bootstrap.go @@ -2,6 +2,7 @@ package bootstrap import ( "bytes" + "context" "errors" "fmt" "io" @@ -49,7 +50,7 @@ func New(templatesDir, manifestDir, pullSecretFile string) *Bootstrap { // Run runs boostrap for Machine Config Controller // It writes all the assets to destDir // nolint:gocyclo -func (b *Bootstrap) Run(destDir string) error { +func (b *Bootstrap) Run(ctx context.Context, destDir string) error { infos, err := ioutil.ReadDir(b.manifestDir) if err != nil { return err @@ -146,7 +147,7 @@ func (b *Bootstrap) Run(destDir string) error { } configs = append(configs, iconfigs...) - rconfigs, err := containerruntimeconfig.RunImageBootstrap(b.templatesDir, cconfig, pools, icspRules, imgCfg) + rconfigs, err := containerruntimeconfig.RunImageBootstrap(ctx, b.templatesDir, cconfig, pools, icspRules, imgCfg) if err != nil { return err } @@ -154,14 +155,14 @@ func (b *Bootstrap) Run(destDir string) error { configs = append(configs, rconfigs...) if len(crconfigs) > 0 { - containerRuntimeConfigs, err := containerruntimeconfig.RunContainerRuntimeBootstrap(b.templatesDir, crconfigs, cconfig, pools) + containerRuntimeConfigs, err := containerruntimeconfig.RunContainerRuntimeBootstrap(ctx, b.templatesDir, crconfigs, cconfig, pools) if err != nil { return err } configs = append(configs, containerRuntimeConfigs...) } if featureGate != nil { - featureConfigs, err := kubeletconfig.RunFeatureGateBootstrap(b.templatesDir, featureGate, nodeConfig, cconfig, pools) + featureConfigs, err := kubeletconfig.RunFeatureGateBootstrap(ctx, b.templatesDir, featureGate, nodeConfig, cconfig, pools) if err != nil { return err } @@ -169,14 +170,14 @@ func (b *Bootstrap) Run(destDir string) error { } if nodeConfig != nil { - nodeConfigs, err := kubeletconfig.RunNodeConfigBootstrap(b.templatesDir, featureGate, cconfig, nodeConfig, pools) + nodeConfigs, err := kubeletconfig.RunNodeConfigBootstrap(ctx, b.templatesDir, featureGate, cconfig, nodeConfig, pools) if err != nil { return err } configs = append(configs, nodeConfigs...) } if len(kconfigs) > 0 { - kconfigs, err := kubeletconfig.RunKubeletBootstrap(b.templatesDir, kconfigs, cconfig, featureGate, nodeConfig, pools) + kconfigs, err := kubeletconfig.RunKubeletBootstrap(ctx, b.templatesDir, kconfigs, cconfig, featureGate, nodeConfig, pools) if err != nil { return err } diff --git a/pkg/controller/bootstrap/bootstrap_test.go b/pkg/controller/bootstrap/bootstrap_test.go index bd4553f4b2..63eba2b4e4 100644 --- a/pkg/controller/bootstrap/bootstrap_test.go +++ b/pkg/controller/bootstrap/bootstrap_test.go @@ -1,6 +1,7 @@ package bootstrap import ( + "context" "fmt" "io/ioutil" "os" @@ -165,7 +166,7 @@ func TestBootstrapRun(t *testing.T) { defer os.RemoveAll(destDir) bootstrap := New("../../../templates", "testdata/bootstrap", "testdata/bootstrap/machineconfigcontroller-pull-secret") - err = bootstrap.Run(destDir) + err = bootstrap.Run(context.TODO(), destDir) require.NoError(t, err) for _, poolName := range []string{"master", "worker"} { From 9bffaf906e216ef68a054d29e0599c4a1a97a0a7 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:10:14 -0500 Subject: [PATCH 08/18] Context cancellation for container runtime ctrl This adds context cancellation to the functions in the container runtime controller. This favors explicitly passing the context as the first parameter so it is apparent that the function is cancellable. This also adds a private context to the controller struct for cases where the function signature could not change due to interface constraints (like Informer event handlers), but that usage makes it less clear what is cancellable and what isn't, so I tried to avoid it unless absolutely necessary. --- .../container_runtime_config_bootstrap.go | 9 +- ...container_runtime_config_bootstrap_test.go | 7 +- .../container_runtime_config_controller.go | 139 ++++++++++-------- ...ontainer_runtime_config_controller_test.go | 48 +++--- .../container-runtime-config/helpers.go | 14 +- 5 files changed, 115 insertions(+), 102 deletions(-) diff --git a/pkg/controller/container-runtime-config/container_runtime_config_bootstrap.go b/pkg/controller/container-runtime-config/container_runtime_config_bootstrap.go index c2dbf8db1f..09bed40252 100644 --- a/pkg/controller/container-runtime-config/container_runtime_config_bootstrap.go +++ b/pkg/controller/container-runtime-config/container_runtime_config_bootstrap.go @@ -1,6 +1,7 @@ package containerruntimeconfig import ( + "context" "fmt" "github.com/golang/glog" @@ -12,7 +13,7 @@ import ( ) // RunContainerRuntimeBootstrap generates ignition configs at bootstrap -func RunContainerRuntimeBootstrap(templateDir string, crconfigs []*mcfgv1.ContainerRuntimeConfig, controllerConfig *mcfgv1.ControllerConfig, mcpPools []*mcfgv1.MachineConfigPool) ([]*mcfgv1.MachineConfig, error) { +func RunContainerRuntimeBootstrap(ctx context.Context, templateDir string, crconfigs []*mcfgv1.ContainerRuntimeConfig, controllerConfig *mcfgv1.ControllerConfig, mcpPools []*mcfgv1.MachineConfigPool) ([]*mcfgv1.MachineConfig, error) { var res []*mcfgv1.MachineConfig managedKeyExist := make(map[string]bool) for _, cfg := range crconfigs { @@ -57,7 +58,7 @@ func RunContainerRuntimeBootstrap(templateDir string, crconfigs []*mcfgv1.Contai if err != nil { return nil, fmt.Errorf("could not marshal container runtime ignition: %w", err) } - managedKey, err := generateBootstrapManagedKeyContainerConfig(pool, managedKeyExist) + managedKey, err := generateBootstrapManagedKeyContainerConfig(ctx, pool, managedKeyExist) if err != nil { return nil, fmt.Errorf("could not marshal container runtime ignition: %w", err) } @@ -90,11 +91,11 @@ func RunContainerRuntimeBootstrap(templateDir string, crconfigs []*mcfgv1.Contai // Note: Only one ContainerConfig manifest per pool is allowed for bootstrap mode for the following reason: // if you provide multiple per pool, they would overwrite each other and not merge, potentially confusing customers post install; // we can simplify the logic for the bootstrap generation and avoid some edge cases. -func generateBootstrapManagedKeyContainerConfig(pool *mcfgv1.MachineConfigPool, managedKeyExist map[string]bool) (string, error) { +func generateBootstrapManagedKeyContainerConfig(ctx context.Context, pool *mcfgv1.MachineConfigPool, managedKeyExist map[string]bool) (string, error) { if _, ok := managedKeyExist[pool.Name]; ok { return "", fmt.Errorf("Error found multiple ContainerConfig targeting MachineConfigPool %v. Please apply only one ContainerConfig manifest for each pool during installation", pool.Name) } - managedKey, err := ctrlcommon.GetManagedKey(pool, nil, "99", "containerruntime", "") + managedKey, err := ctrlcommon.GetManagedKey(ctx, pool, nil, "99", "containerruntime", "") if err != nil { return "", err } diff --git a/pkg/controller/container-runtime-config/container_runtime_config_bootstrap_test.go b/pkg/controller/container-runtime-config/container_runtime_config_bootstrap_test.go index 27029b6a9a..7d71408042 100644 --- a/pkg/controller/container-runtime-config/container_runtime_config_bootstrap_test.go +++ b/pkg/controller/container-runtime-config/container_runtime_config_bootstrap_test.go @@ -1,6 +1,7 @@ package containerruntimeconfig import ( + "context" "testing" apicfgv1 "github.com/openshift/api/config/v1" @@ -29,7 +30,7 @@ func TestAddKubeletCfgAfterBootstrapKubeletCfg(t *testing.T) { f.mccrLister = append(f.mccrLister, ctrcfg) f.objects = append(f.objects, ctrcfg) - mcs, err := RunContainerRuntimeBootstrap("../../../templates", []*mcfgv1.ContainerRuntimeConfig{ctrcfg}, cc, pools) + mcs, err := RunContainerRuntimeBootstrap(context.TODO(), "../../../templates", []*mcfgv1.ContainerRuntimeConfig{ctrcfg}, cc, pools) require.NoError(t, err) require.Len(t, mcs, 1) @@ -39,14 +40,14 @@ func TestAddKubeletCfgAfterBootstrapKubeletCfg(t *testing.T) { f.mccrLister = append(f.mccrLister, ctrcfg1) f.objects = append(f.objects, ctrcfg1) c := f.newController() - err = c.syncHandler(getKey(ctrcfg1, t)) + err = c.syncHandler(context.TODO(), getKey(ctrcfg1, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } // resync ctrcfg and check the managedKey c = f.newController() - err = c.syncHandler(getKey(ctrcfg, t)) + err = c.syncHandler(context.TODO(), getKey(ctrcfg, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } diff --git a/pkg/controller/container-runtime-config/container_runtime_config_controller.go b/pkg/controller/container-runtime-config/container_runtime_config_controller.go index 82616b1075..d717f2f101 100644 --- a/pkg/controller/container-runtime-config/container_runtime_config_controller.go +++ b/pkg/controller/container-runtime-config/container_runtime_config_controller.go @@ -69,14 +69,16 @@ var updateBackoff = wait.Backoff{ // Controller defines the container runtime config controller. type Controller struct { + name string + templatesDir string client mcfgclientset.Interface configClient configclientset.Interface eventRecorder record.EventRecorder - syncHandler func(mcp string) error - syncImgHandler func(mcp string) error + syncHandler func(ctx context.Context, mcp string) error + syncImgHandler func(ctx context.Context, mcp string) error enqueueContainerRuntimeConfig func(*mcfgv1.ContainerRuntimeConfig) ccLister mcfglistersv1.ControllerConfigLister @@ -99,6 +101,8 @@ type Controller struct { queue workqueue.RateLimitingInterface imgQueue workqueue.RateLimitingInterface + + ctx context.Context } // New returns a new container runtime config controller @@ -119,6 +123,7 @@ func New( eventBroadcaster.StartRecordingToSink(&coreclientsetv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) ctrl := &Controller{ + name: "ContainerRuntimeController", templatesDir: templatesDir, client: mcfgClient, configClient: configClient, @@ -171,12 +176,13 @@ func New( } // Run executes the container runtime config controller. -func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { +func (ctrl *Controller) Run(ctx context.Context, workers int) { defer utilruntime.HandleCrash() defer ctrl.queue.ShutDown() defer ctrl.imgQueue.ShutDown() + ctrl.ctx = ctx - if !cache.WaitForCacheSync(stopCh, ctrl.mcpListerSynced, ctrl.mccrListerSynced, ctrl.ccListerSynced, + if !cache.WaitForCacheSync(ctx.Done(), ctrl.mcpListerSynced, ctrl.mccrListerSynced, ctrl.ccListerSynced, ctrl.imgListerSynced, ctrl.icspListerSynced, ctrl.clusterVersionListerSynced) { return } @@ -185,13 +191,13 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { defer glog.Info("Shutting down MachineConfigController-ContainerRuntimeConfigController") for i := 0; i < workers; i++ { - go wait.Until(ctrl.worker, time.Second, stopCh) + go wait.UntilWithContext(ctx, ctrl.worker, time.Second) } // Just need one worker for the image config - go wait.Until(ctrl.imgWorker, time.Second, stopCh) + go wait.UntilWithContext(ctx, ctrl.imgWorker, time.Second) - <-stopCh + <-ctx.Done() } func ctrConfigTriggerObjectChange(old, new *mcfgv1.ContainerRuntimeConfig) bool { @@ -258,23 +264,23 @@ func (ctrl *Controller) deleteContainerRuntimeConfig(obj interface{}) { return } } - if err := ctrl.cascadeDelete(cfg); err != nil { + if err := ctrl.cascadeDelete(ctrl.ctx, cfg); err != nil { utilruntime.HandleError(fmt.Errorf("couldn't delete object %#v: %w", cfg, err)) } else { glog.V(4).Infof("Deleted ContainerRuntimeConfig %s and restored default config", cfg.Name) } } -func (ctrl *Controller) cascadeDelete(cfg *mcfgv1.ContainerRuntimeConfig) error { +func (ctrl *Controller) cascadeDelete(ctx context.Context, cfg *mcfgv1.ContainerRuntimeConfig) error { if len(cfg.GetFinalizers()) == 0 { return nil } mcName := cfg.GetFinalizers()[0] - err := ctrl.client.MachineconfigurationV1().MachineConfigs().Delete(context.TODO(), mcName, metav1.DeleteOptions{}) + err := ctrl.client.MachineconfigurationV1().MachineConfigs().Delete(ctx, mcName, metav1.DeleteOptions{}) if err != nil && !errors.IsNotFound(err) { return err } - if err := ctrl.popFinalizerFromContainerRuntimeConfig(cfg); err != nil { + if err := ctrl.popFinalizerFromContainerRuntimeConfig(ctx, cfg); err != nil { return err } return nil @@ -300,37 +306,37 @@ func (ctrl *Controller) enqueueRateLimited(cfg *mcfgv1.ContainerRuntimeConfig) { // worker runs a worker thread that just dequeues items, processes them, and marks them done. // It enforces that the syncHandler is never invoked concurrently with the same key. -func (ctrl *Controller) worker() { - for ctrl.processNextWorkItem() { +func (ctrl *Controller) worker(ctx context.Context) { + for ctrl.processNextWorkItem(ctx) { } } -func (ctrl *Controller) imgWorker() { - for ctrl.processNextImgWorkItem() { +func (ctrl *Controller) imgWorker(ctx context.Context) { + for ctrl.processNextImgWorkItem(ctx) { } } -func (ctrl *Controller) processNextWorkItem() bool { +func (ctrl *Controller) processNextWorkItem(ctx context.Context) bool { key, quit := ctrl.queue.Get() if quit { return false } defer ctrl.queue.Done(key) - err := ctrl.syncHandler(key.(string)) + err := ctrl.syncHandler(ctx, key.(string)) ctrl.handleErr(err, key) return true } -func (ctrl *Controller) processNextImgWorkItem() bool { +func (ctrl *Controller) processNextImgWorkItem(ctx context.Context) bool { key, quit := ctrl.imgQueue.Get() if quit { return false } defer ctrl.imgQueue.Done(key) - err := ctrl.syncImgHandler(key.(string)) + err := ctrl.syncImgHandler(ctx, key.(string)) ctrl.handleImgErr(err, key) return true @@ -417,7 +423,7 @@ func generateOriginalContainerRuntimeConfigs(templateDir string, cc *mcfgv1.Cont return gmcStorageConfig, gmcRegistriesConfig, gmcPolicyJSON, nil } -func (ctrl *Controller) syncStatusOnly(cfg *mcfgv1.ContainerRuntimeConfig, err error, args ...interface{}) error { +func (ctrl *Controller) syncStatusOnly(ctx context.Context, cfg *mcfgv1.ContainerRuntimeConfig, err error, args ...interface{}) error { statusUpdateErr := retry.RetryOnConflict(updateBackoff, func() error { newcfg, getErr := ctrl.mccrLister.Get(cfg.Name) if getErr != nil { @@ -437,7 +443,7 @@ func (ctrl *Controller) syncStatusOnly(cfg *mcfgv1.ContainerRuntimeConfig, err e } else if newcfg.Status.Conditions[len(newcfg.Status.Conditions)-1].Message == newStatusCondition.Message { newcfg.Status.Conditions[len(newcfg.Status.Conditions)-1] = newStatusCondition } - _, updateErr := ctrl.client.MachineconfigurationV1().ContainerRuntimeConfigs().UpdateStatus(context.TODO(), newcfg, metav1.UpdateOptions{}) + _, updateErr := ctrl.client.MachineconfigurationV1().ContainerRuntimeConfigs().UpdateStatus(ctx, newcfg, metav1.UpdateOptions{}) return updateErr }) // If an error occurred in updating the status just log it @@ -449,7 +455,7 @@ func (ctrl *Controller) syncStatusOnly(cfg *mcfgv1.ContainerRuntimeConfig, err e } // addAnnotation adds the annotions for a ctrcfg object with the given annotationKey and annotationVal -func (ctrl *Controller) addAnnotation(cfg *mcfgv1.ContainerRuntimeConfig, annotationKey, annotationVal string) error { +func (ctrl *Controller) addAnnotation(ctx context.Context, cfg *mcfgv1.ContainerRuntimeConfig, annotationKey, annotationVal string) error { annotationUpdateErr := retry.RetryOnConflict(updateBackoff, func() error { newcfg, getErr := ctrl.mccrLister.Get(cfg.Name) if getErr != nil { @@ -458,7 +464,7 @@ func (ctrl *Controller) addAnnotation(cfg *mcfgv1.ContainerRuntimeConfig, annota newcfg.SetAnnotations(map[string]string{ annotationKey: annotationVal, }) - _, updateErr := ctrl.client.MachineconfigurationV1().ContainerRuntimeConfigs().Update(context.TODO(), newcfg, metav1.UpdateOptions{}) + _, updateErr := ctrl.client.MachineconfigurationV1().ContainerRuntimeConfigs().Update(ctx, newcfg, metav1.UpdateOptions{}) return updateErr }) if annotationUpdateErr != nil { @@ -470,7 +476,7 @@ func (ctrl *Controller) addAnnotation(cfg *mcfgv1.ContainerRuntimeConfig, annota // syncContainerRuntimeConfig will sync the ContainerRuntimeconfig with the given key. // This function is not meant to be invoked concurrently with the same key. // nolint: gocyclo -func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { +func (ctrl *Controller) syncContainerRuntimeConfig(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing ContainerRuntimeconfig %q (%v)", key, startTime) defer func() { @@ -498,14 +504,14 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { // Check for Deleted ContainerRuntimeConfig and optionally delete finalizers if cfg.DeletionTimestamp != nil { if len(cfg.GetFinalizers()) > 0 { - return ctrl.cascadeDelete(cfg) + return ctrl.cascadeDelete(ctx, cfg) } return nil } // Validate the ContainerRuntimeConfig CR if err := validateUserContainerRuntimeConfig(cfg); err != nil { - return ctrl.syncStatusOnly(cfg, err) + return ctrl.syncStatusOnly(ctx, cfg, err) } // Get ControllerConfig @@ -517,26 +523,26 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { // Find all MachineConfigPools mcpPools, err := ctrl.getPoolsForContainerRuntimeConfig(cfg) if err != nil { - return ctrl.syncStatusOnly(cfg, err) + return ctrl.syncStatusOnly(ctx, cfg, err) } if len(mcpPools) == 0 { err := fmt.Errorf("containerRuntimeConfig %v does not match any MachineConfigPools", key) glog.V(2).Infof("%v", err) - return ctrl.syncStatusOnly(cfg, err) + return ctrl.syncStatusOnly(ctx, cfg, err) } for _, pool := range mcpPools { role := pool.Name // Get MachineConfig - managedKey, err := getManagedKeyCtrCfg(pool, ctrl.client, cfg) + managedKey, err := getManagedKeyCtrCfg(ctx, pool, ctrl.client, cfg) if err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not get ctrcfg key: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not get ctrcfg key: %v", err) } - mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), managedKey, metav1.GetOptions{}) + mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(ctx, managedKey, metav1.GetOptions{}) isNotFound := errors.IsNotFound(err) if err != nil && !isNotFound { - return ctrl.syncStatusOnly(cfg, err, "could not find MachineConfig: %v", managedKey) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not find MachineConfig: %v", managedKey) } // If we have seen this generation and the sync didn't fail, then skip if !isNotFound && cfg.Status.ObservedGeneration >= cfg.Generation && cfg.Status.Conditions[len(cfg.Status.Conditions)-1].Type == mcfgv1.ContainerRuntimeConfigSuccess { @@ -549,7 +555,7 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { // Generate the original ContainerRuntimeConfig originalStorageIgn, _, _, err := generateOriginalContainerRuntimeConfigs(ctrl.templatesDir, controllerConfig, role) if err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not generate origin ContainerRuntime Configs: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not generate origin ContainerRuntime Configs: %v", err) } var configFileList []generatedConfigFile @@ -558,10 +564,10 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { storageTOML, err := mergeConfigChanges(originalStorageIgn, cfg, updateStorageConfig) if err != nil { glog.V(2).Infoln(cfg, err, "error merging user changes to storage.conf: %v", err) - ctrl.syncStatusOnly(cfg, err) + ctrl.syncStatusOnly(ctx, cfg, err) } else { configFileList = append(configFileList, generatedConfigFile{filePath: storageConfigPath, data: storageTOML}) - ctrl.syncStatusOnly(cfg, nil) + ctrl.syncStatusOnly(ctx, cfg, nil) } } @@ -575,15 +581,15 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { tempIgnCfg := ctrlcommon.NewIgnConfig() mc, err = ctrlcommon.MachineConfigFromIgnConfig(role, managedKey, tempIgnCfg) if err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not create MachineConfig from new Ignition config: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not create MachineConfig from new Ignition config: %v", err) } _, ok := cfg.GetAnnotations()[ctrlcommon.MCNameSuffixAnnotationKey] arr := strings.Split(managedKey, "-") // the first managed key value 99-poolname-generated-containerruntime does not have a suffix // set "" as suffix annotation to the containerruntime config object if _, err := strconv.Atoi(arr[len(arr)-1]); err != nil && !ok { - if err := ctrl.addAnnotation(cfg, ctrlcommon.MCNameSuffixAnnotationKey, ""); err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not update annotation for containerruntimeConfig") + if err := ctrl.addAnnotation(ctx, cfg, ctrlcommon.MCNameSuffixAnnotationKey, ""); err != nil { + return ctrl.syncStatusOnly(ctx, cfg, err, "could not update annotation for containerruntimeConfig") } } // If the MC name suffix annotation does not exist and the managed key value returned has a suffix, then add the MC name @@ -591,8 +597,8 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { if len(arr) > 4 && !ok { _, err := strconv.Atoi(arr[len(arr)-1]) if err == nil { - if err := ctrl.addAnnotation(cfg, ctrlcommon.MCNameSuffixAnnotationKey, arr[len(arr)-1]); err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not update annotation for containerRuntimeConfig") + if err := ctrl.addAnnotation(ctx, cfg, ctrlcommon.MCNameSuffixAnnotationKey, arr[len(arr)-1]); err != nil { + return ctrl.syncStatusOnly(ctx, cfg, err, "could not update annotation for containerRuntimeConfig") } } } @@ -601,7 +607,7 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { ctrRuntimeConfigIgn := createNewIgnition(configFileList) rawCtrRuntimeConfigIgn, err := json.Marshal(ctrRuntimeConfigIgn) if err != nil { - return ctrl.syncStatusOnly(cfg, err, "error marshalling container runtime config Ignition: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "error marshalling container runtime config Ignition: %v", err) } mc.Spec.Config.Raw = rawCtrRuntimeConfigIgn @@ -615,34 +621,34 @@ func (ctrl *Controller) syncContainerRuntimeConfig(key string) error { if err := retry.RetryOnConflict(updateBackoff, func() error { var err error if isNotFound { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(context.TODO(), mc, metav1.CreateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(ctx, mc, metav1.CreateOptions{}) } else { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(context.TODO(), mc, metav1.UpdateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(ctx, mc, metav1.UpdateOptions{}) } return err }); err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not Create/Update MachineConfig: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not Create/Update MachineConfig: %v", err) } // Add Finalizers to the ContainerRuntimeConfigs - if err := ctrl.addFinalizerToContainerRuntimeConfig(cfg, mc); err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not add finalizers to ContainerRuntimeConfig: %v", err) + if err := ctrl.addFinalizerToContainerRuntimeConfig(ctx, cfg, mc); err != nil { + return ctrl.syncStatusOnly(ctx, cfg, err, "could not add finalizers to ContainerRuntimeConfig: %v", err) } glog.Infof("Applied ContainerRuntimeConfig %v on MachineConfigPool %v", key, pool.Name) } - if err := ctrl.cleanUpDuplicatedMC(); err != nil { + if err := ctrl.cleanUpDuplicatedMC(ctx); err != nil { return err } - return ctrl.syncStatusOnly(cfg, nil) + return ctrl.syncStatusOnly(ctx, cfg, nil) } // cleanUpDuplicatedMC removes the MC of uncorrected version if format of its name contains 'generated-xxx'. // BZ 1955517: upgrade when there are more than one configs, these generated MC will be duplicated // by upgraded MC with number suffixed name (func getManagedKeyCtrCfg()) and fails the upgrade. -func (ctrl *Controller) cleanUpDuplicatedMC() error { +func (ctrl *Controller) cleanUpDuplicatedMC(ctx context.Context) error { generatedCtrCfg := "generated-containerruntime" // Get all machine configs - mcList, err := ctrl.client.MachineconfigurationV1().MachineConfigs().List(context.TODO(), metav1.ListOptions{}) + mcList, err := ctrl.client.MachineconfigurationV1().MachineConfigs().List(ctx, metav1.ListOptions{}) if err != nil { return fmt.Errorf("error listing containerruntime machine configs: %w", err) } @@ -652,7 +658,7 @@ func (ctrl *Controller) cleanUpDuplicatedMC() error { } // delete the containerruntime mc if its degraded if mc.Annotations[ctrlcommon.GeneratedByControllerVersionAnnotationKey] != version.Hash { - if err := ctrl.client.MachineconfigurationV1().MachineConfigs().Delete(context.TODO(), mc.Name, metav1.DeleteOptions{}); err != nil { + if err := ctrl.client.MachineconfigurationV1().MachineConfigs().Delete(ctx, mc.Name, metav1.DeleteOptions{}); err != nil { return fmt.Errorf("error deleting degraded containerruntime machine config %s: %w", mc.Name, err) } @@ -678,7 +684,7 @@ func mergeConfigChanges(origFile *ign3types.File, cfg *mcfgv1.ContainerRuntimeCo return cfgTOML, nil } -func (ctrl *Controller) syncImageConfig(key string) error { +func (ctrl *Controller) syncImageConfig(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing ImageConfig %q (%v)", key, startTime) defer func() { @@ -754,7 +760,7 @@ func (ctrl *Controller) syncImageConfig(key string) error { applied := true role := pool.Name // Get MachineConfig - managedKey, err := getManagedKeyReg(pool, ctrl.client) + managedKey, err := getManagedKeyReg(ctx, pool, ctrl.client) if err != nil { return err } @@ -769,7 +775,7 @@ func (ctrl *Controller) syncImageConfig(key string) error { if err != nil { return fmt.Errorf("could not encode registries Ignition config: %w", err) } - mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), managedKey, metav1.GetOptions{}) + mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(ctx, managedKey, metav1.GetOptions{}) if err != nil && !errors.IsNotFound(err) { return fmt.Errorf("could not find MachineConfig: %w", err) } @@ -804,9 +810,9 @@ func (ctrl *Controller) syncImageConfig(key string) error { } // Create or Update, on conflict retry if isNotFound { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(context.TODO(), mc, metav1.CreateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(ctx, mc, metav1.CreateOptions{}) } else { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(context.TODO(), mc, metav1.UpdateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(ctx, mc, metav1.UpdateOptions{}) } return err @@ -875,7 +881,7 @@ func registriesConfigIgnition(templateDir string, controllerConfig *mcfgv1.Contr // RunImageBootstrap generates MachineConfig objects for mcpPools that would have been generated by syncImageConfig, // except that mcfgv1.Image is not available. -func RunImageBootstrap(templateDir string, controllerConfig *mcfgv1.ControllerConfig, mcpPools []*mcfgv1.MachineConfigPool, icspRules []*apioperatorsv1alpha1.ImageContentSourcePolicy, imgCfg *apicfgv1.Image) ([]*mcfgv1.MachineConfig, error) { +func RunImageBootstrap(ctx context.Context, templateDir string, controllerConfig *mcfgv1.ControllerConfig, mcpPools []*mcfgv1.MachineConfigPool, icspRules []*apioperatorsv1alpha1.ImageContentSourcePolicy, imgCfg *apicfgv1.Image) ([]*mcfgv1.MachineConfig, error) { var ( insecureRegs, registriesBlocked, policyBlocked, allowedRegs, searchRegs []string err error @@ -897,7 +903,7 @@ func RunImageBootstrap(templateDir string, controllerConfig *mcfgv1.ControllerCo var res []*mcfgv1.MachineConfig for _, pool := range mcpPools { role := pool.Name - managedKey, err := getManagedKeyReg(pool, nil) + managedKey, err := getManagedKeyReg(ctx, pool, nil) if err != nil { return nil, err } @@ -924,7 +930,7 @@ func RunImageBootstrap(templateDir string, controllerConfig *mcfgv1.ControllerCo return res, nil } -func (ctrl *Controller) popFinalizerFromContainerRuntimeConfig(ctrCfg *mcfgv1.ContainerRuntimeConfig) error { +func (ctrl *Controller) popFinalizerFromContainerRuntimeConfig(ctx context.Context, ctrCfg *mcfgv1.ContainerRuntimeConfig) error { return retry.RetryOnConflict(updateBackoff, func() error { newcfg, err := ctrl.mccrLister.Get(ctrCfg.Name) if errors.IsNotFound(err) { @@ -951,16 +957,16 @@ func (ctrl *Controller) popFinalizerFromContainerRuntimeConfig(ctrCfg *mcfgv1.Co if err != nil { return err } - return ctrl.patchContainerRuntimeConfigs(ctrCfg.Name, patch) + return ctrl.patchContainerRuntimeConfigs(ctx, ctrCfg.Name, patch) }) } -func (ctrl *Controller) patchContainerRuntimeConfigs(name string, patch []byte) error { - _, err := ctrl.client.MachineconfigurationV1().ContainerRuntimeConfigs().Patch(context.TODO(), name, types.MergePatchType, patch, metav1.PatchOptions{}) +func (ctrl *Controller) patchContainerRuntimeConfigs(ctx context.Context, name string, patch []byte) error { + _, err := ctrl.client.MachineconfigurationV1().ContainerRuntimeConfigs().Patch(ctx, name, types.MergePatchType, patch, metav1.PatchOptions{}) return err } -func (ctrl *Controller) addFinalizerToContainerRuntimeConfig(ctrCfg *mcfgv1.ContainerRuntimeConfig, mc *mcfgv1.MachineConfig) error { +func (ctrl *Controller) addFinalizerToContainerRuntimeConfig(ctx context.Context, ctrCfg *mcfgv1.ContainerRuntimeConfig, mc *mcfgv1.MachineConfig) error { return retry.RetryOnConflict(updateBackoff, func() error { newcfg, err := ctrl.mccrLister.Get(ctrCfg.Name) if errors.IsNotFound(err) { @@ -993,7 +999,7 @@ func (ctrl *Controller) addFinalizerToContainerRuntimeConfig(ctrCfg *mcfgv1.Cont if err != nil { return err } - return ctrl.patchContainerRuntimeConfigs(ctrCfg.Name, patch) + return ctrl.patchContainerRuntimeConfigs(ctx, ctrCfg.Name, patch) }) } @@ -1023,3 +1029,8 @@ func (ctrl *Controller) getPoolsForContainerRuntimeConfig(config *mcfgv1.Contain return pools, nil } + +// Name() returns the name of this controller +func (ctrl *Controller) Name() string { + return ctrl.name +} diff --git a/pkg/controller/container-runtime-config/container_runtime_config_controller_test.go b/pkg/controller/container-runtime-config/container_runtime_config_controller_test.go index 313bde56cf..605db61f4c 100644 --- a/pkg/controller/container-runtime-config/container_runtime_config_controller_test.go +++ b/pkg/controller/container-runtime-config/container_runtime_config_controller_test.go @@ -242,14 +242,14 @@ func (f *fixture) runExpectError(mcpname string) { func (f *fixture) runController(mcpname string, expectError bool) { c := f.newController() - err := c.syncImgHandler(mcpname) + err := c.syncImgHandler(context.TODO(), mcpname) if !expectError && err != nil { f.t.Errorf("error syncing image config: %v", err) } else if expectError && err == nil { f.t.Error("expected error syncing image config, got nil") } - err = c.syncHandler(mcpname) + err = c.syncHandler(context.TODO(), mcpname) if !expectError && err != nil { f.t.Errorf("error syncing containerruntimeconfigs: %v", err) } else if expectError && err == nil { @@ -444,7 +444,7 @@ func TestContainerRuntimeConfigCreate(t *testing.T) { mcp := helpers.NewMachineConfigPool("master", nil, helpers.MasterSelector, "v0") mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") ctrcfg1 := newContainerRuntimeConfig("set-log-level", &mcfgv1.ContainerRuntimeConfiguration{LogLevel: "debug", LogSizeMax: resource.MustParse("9k"), OverlaySize: resource.MustParse("3G")}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) - ctrCfgKey, _ := getManagedKeyCtrCfg(mcp, f.client, ctrcfg1) + ctrCfgKey, _ := getManagedKeyCtrCfg(context.TODO(), mcp, f.client, ctrcfg1) mcs1 := helpers.NewMachineConfig(getManagedKeyCtrCfgDeprecated(mcp), map[string]string{"node-role": "master"}, "dummy://", []ign3types.File{{}}) mcs2 := mcs1.DeepCopy() mcs2.Name = ctrCfgKey @@ -481,7 +481,7 @@ func TestContainerRuntimeConfigUpdate(t *testing.T) { mcp := helpers.NewMachineConfigPool("master", nil, helpers.MasterSelector, "v0") mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") ctrcfg1 := newContainerRuntimeConfig("set-log-level", &mcfgv1.ContainerRuntimeConfiguration{LogLevel: "debug", LogSizeMax: resource.MustParse("9k"), OverlaySize: resource.MustParse("3G")}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) - keyCtrCfg, _ := getManagedKeyCtrCfg(mcp, f.client, ctrcfg1) + keyCtrCfg, _ := getManagedKeyCtrCfg(context.TODO(), mcp, f.client, ctrcfg1) mcs := helpers.NewMachineConfig(getManagedKeyCtrCfgDeprecated(mcp), map[string]string{"node-role": "master"}, "dummy://", []ign3types.File{{}}) mcsUpdate := mcs.DeepCopy() mcsUpdate.Name = keyCtrCfg @@ -504,7 +504,7 @@ func TestContainerRuntimeConfigUpdate(t *testing.T) { c := f.newController() stopCh := make(chan struct{}) - err := c.syncHandler(getKey(ctrcfg1, t)) + err := c.syncHandler(context.TODO(), getKey(ctrcfg1, t)) if err != nil { t.Errorf("syncHandler returned %v", err) } @@ -531,7 +531,7 @@ func TestContainerRuntimeConfigUpdate(t *testing.T) { glog.Info("Applying update") // Apply update - err = c.syncHandler(getKey(ctrcfgUpdate, t)) + err = c.syncHandler(context.TODO(), getKey(ctrcfgUpdate, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } @@ -562,8 +562,8 @@ func TestImageConfigCreate(t *testing.T) { mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") imgcfg1 := newImageConfig("cluster", &apicfgv1.RegistrySources{InsecureRegistries: []string{"blah.io"}, AllowedRegistries: []string{"allow.io"}, ContainerRuntimeSearchRegistries: []string{"search-reg.io"}}) cvcfg1 := newClusterVersionConfig("version", "test.io/myuser/myimage:test") - keyReg1, _ := getManagedKeyReg(mcp, nil) - keyReg2, _ := getManagedKeyReg(mcp2, nil) + keyReg1, _ := getManagedKeyReg(context.TODO(), mcp, nil) + keyReg2, _ := getManagedKeyReg(context.TODO(), mcp2, nil) mcs1 := helpers.NewMachineConfig(keyReg1, map[string]string{"node-role": "master"}, "dummy://", []ign3types.File{{}}) mcs2 := helpers.NewMachineConfig(keyReg2, map[string]string{"node-role": "worker"}, "dummy://", []ign3types.File{{}}) @@ -604,8 +604,8 @@ func TestImageConfigUpdate(t *testing.T) { mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") imgcfg1 := newImageConfig("cluster", &apicfgv1.RegistrySources{InsecureRegistries: []string{"blah.io"}, AllowedRegistries: []string{"allow.io"}, ContainerRuntimeSearchRegistries: []string{"search-reg.io"}}) cvcfg1 := newClusterVersionConfig("version", "test.io/myuser/myimage:test") - keyReg1, _ := getManagedKeyReg(mcp, nil) - keyReg2, _ := getManagedKeyReg(mcp2, nil) + keyReg1, _ := getManagedKeyReg(context.TODO(), mcp, nil) + keyReg2, _ := getManagedKeyReg(context.TODO(), mcp2, nil) mcs1 := helpers.NewMachineConfig(getManagedKeyRegDeprecated(mcp), map[string]string{"node-role": "master"}, "dummy://", []ign3types.File{{}}) mcs2 := helpers.NewMachineConfig(getManagedKeyRegDeprecated(mcp2), map[string]string{"node-role": "worker"}, "dummy://", []ign3types.File{{}}) mcs1Update := mcs1.DeepCopy() @@ -632,7 +632,7 @@ func TestImageConfigUpdate(t *testing.T) { c := f.newController() stopCh := make(chan struct{}) - err := c.syncImgHandler("cluster") + err := c.syncImgHandler(context.TODO(), "cluster") if err != nil { t.Errorf("syncImgHandler returned %v", err) } @@ -665,7 +665,7 @@ func TestImageConfigUpdate(t *testing.T) { glog.Info("Applying update") // Apply update - err = c.syncImgHandler("") + err = c.syncImgHandler(context.TODO(), "") if err != nil { t.Errorf("syncImgHandler returned: %v", err) } @@ -700,8 +700,8 @@ func TestICSPUpdate(t *testing.T) { mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") imgcfg1 := newImageConfig("cluster", &apicfgv1.RegistrySources{InsecureRegistries: []string{"blah.io"}}) cvcfg1 := newClusterVersionConfig("version", "test.io/myuser/myimage:test") - keyReg1, _ := getManagedKeyReg(mcp, nil) - keyReg2, _ := getManagedKeyReg(mcp2, nil) + keyReg1, _ := getManagedKeyReg(context.TODO(), mcp, nil) + keyReg2, _ := getManagedKeyReg(context.TODO(), mcp2, nil) mcs1 := helpers.NewMachineConfig(getManagedKeyRegDeprecated(mcp), map[string]string{"node-role": "master"}, "dummy://", []ign3types.File{{}}) mcs2 := helpers.NewMachineConfig(getManagedKeyRegDeprecated(mcp2), map[string]string{"node-role": "worker"}, "dummy://", []ign3types.File{{}}) icsp := newICSP("built-in", []apioperatorsv1alpha1.RepositoryDigestMirrors{ @@ -733,7 +733,7 @@ func TestICSPUpdate(t *testing.T) { c := f.newController() stopCh := make(chan struct{}) - err := c.syncImgHandler("cluster") + err := c.syncImgHandler(context.TODO(), "cluster") if err != nil { t.Errorf("syncImgHandler returned %v", err) } @@ -770,7 +770,7 @@ func TestICSPUpdate(t *testing.T) { glog.Info("Applying update") // Apply update - err = c.syncImgHandler("") + err = c.syncImgHandler(context.TODO(), "") if err != nil { t.Errorf("syncImgHandler returned: %v", err) } @@ -811,12 +811,12 @@ func TestRunImageBootstrap(t *testing.T) { // both registries.conf and policy.json as blocked imgCfg := newImageConfig("cluster", &apicfgv1.RegistrySources{InsecureRegistries: []string{"insecure-reg-1.io", "insecure-reg-2.io"}, BlockedRegistries: []string{"blocked-reg.io", "release-reg.io"}, ContainerRuntimeSearchRegistries: []string{"search-reg.io"}}) - mcs, err := RunImageBootstrap("../../../templates", cc, pools, icspRules, imgCfg) + mcs, err := RunImageBootstrap(context.TODO(), "../../../templates", cc, pools, icspRules, imgCfg) require.NoError(t, err) require.Len(t, mcs, len(pools)) for i := range pools { - keyReg, _ := getManagedKeyReg(pools[i], nil) + keyReg, _ := getManagedKeyReg(context.TODO(), pools[i], nil) verifyRegistriesConfigAndPolicyJSONContents(t, mcs[i], keyReg, imgCfg, icspRules, cc.Spec.ReleaseImage, true, true) } }) @@ -1076,7 +1076,7 @@ func TestCleanUpDuplicatedMC(t *testing.T) { require.NoError(t, err) require.Len(t, mcList.Items, 3) - ctrl.cleanUpDuplicatedMC() + ctrl.cleanUpDuplicatedMC(context.TODO()) // successful test: ony custom and upgraded MCs stay mcList, err = ctrl.client.MachineconfigurationV1().MachineConfigs().List(context.TODO(), metav1.ListOptions{}) require.NoError(t, err) @@ -1153,7 +1153,7 @@ func TestContainerruntimeConfigResync(t *testing.T) { ccr1 := newContainerRuntimeConfig("log-level-1", &mcfgv1.ContainerRuntimeConfiguration{LogLevel: "debug"}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) ccr2 := newContainerRuntimeConfig("log-level-2", &mcfgv1.ContainerRuntimeConfiguration{LogLevel: "debug"}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) - ctrConfigKey, _ := getManagedKeyCtrCfg(mcp, f.client, ccr1) + ctrConfigKey, _ := getManagedKeyCtrCfg(context.TODO(), mcp, f.client, ccr1) mcs := helpers.NewMachineConfig(ctrConfigKey, map[string]string{"node-role/master": ""}, "dummy://", []ign3types.File{{}}) mcsDeprecated := mcs.DeepCopy() mcsDeprecated.Name = getManagedKeyCtrCfgDeprecated(mcp) @@ -1165,7 +1165,7 @@ func TestContainerruntimeConfigResync(t *testing.T) { f.objects = append(f.objects, ccr1) c := f.newController() - err := c.syncHandler(getKey(ccr1, t)) + err := c.syncHandler(context.TODO(), getKey(ccr1, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } @@ -1174,7 +1174,7 @@ func TestContainerruntimeConfigResync(t *testing.T) { f.objects = append(f.objects, ccr2) c = f.newController() - err = c.syncHandler(getKey(ccr2, t)) + err = c.syncHandler(context.TODO(), getKey(ccr2, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } @@ -1187,7 +1187,7 @@ func TestContainerruntimeConfigResync(t *testing.T) { // resync kc1 and kc2 c = f.newController() - err = c.syncHandler(getKey(ccr1, t)) + err = c.syncHandler(context.TODO(), getKey(ccr1, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } @@ -1195,7 +1195,7 @@ func TestContainerruntimeConfigResync(t *testing.T) { require.Equal(t, "", val) c = f.newController() - err = c.syncHandler(getKey(ccr2, t)) + err = c.syncHandler(context.TODO(), getKey(ccr2, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } diff --git a/pkg/controller/container-runtime-config/helpers.go b/pkg/controller/container-runtime-config/helpers.go index f4d279d26a..e8a6b2a829 100644 --- a/pkg/controller/container-runtime-config/helpers.go +++ b/pkg/controller/container-runtime-config/helpers.go @@ -169,15 +169,15 @@ func getManagedKeyCtrCfgDeprecated(pool *mcfgv1.MachineConfigPool) string { } // nolint: dupl -func getManagedKeyCtrCfg(pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface, cfg *mcfgv1.ContainerRuntimeConfig) (string, error) { +func getManagedKeyCtrCfg(ctx context.Context, pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface, cfg *mcfgv1.ContainerRuntimeConfig) (string, error) { // Get all the ctrcfg CRs - ctrcfgListAll, err := client.MachineconfigurationV1().ContainerRuntimeConfigs().List(context.TODO(), metav1.ListOptions{}) + ctrcfgListAll, err := client.MachineconfigurationV1().ContainerRuntimeConfigs().List(ctx, metav1.ListOptions{}) if err != nil { return "", fmt.Errorf("error listing container runtime configs: %w", err) } // If there is no ctrcfg in the list, return the default MC name with no suffix if ctrcfgListAll == nil || len(ctrcfgListAll.Items) == 0 { - return ctrlcommon.GetManagedKey(pool, client, "99", "containerruntime", getManagedKeyCtrCfgDeprecated(pool)) + return ctrlcommon.GetManagedKey(ctx, pool, client, "99", "containerruntime", getManagedKeyCtrCfgDeprecated(pool)) } var ctrcfgList []mcfgv1.ContainerRuntimeConfig @@ -206,13 +206,13 @@ func getManagedKeyCtrCfg(pool *mcfgv1.MachineConfigPool, client mcfgclientset.In return fmt.Sprintf("99-%s-generated-containerruntime-%s", pool.Name, val), nil } // if the suffix val is "", mc name should not suffixed the cfg to be updated is the first containerruntime config has been created - return ctrlcommon.GetManagedKey(pool, client, "99", "containerruntime", getManagedKeyCtrCfgDeprecated(pool)) + return ctrlcommon.GetManagedKey(ctx, pool, client, "99", "containerruntime", getManagedKeyCtrCfgDeprecated(pool)) } // If we are here, this means that a new containerruntime config was created, so we have to calculate the suffix value for its MC name // if the containerruntime config is the only one in the list, mc name should not suffixed since cfg is the first containerruntime config to be created if len(ctrcfgList) == 1 { - return ctrlcommon.GetManagedKey(pool, client, "99", "containerruntime", getManagedKeyCtrCfgDeprecated(pool)) + return ctrlcommon.GetManagedKey(ctx, pool, client, "99", "containerruntime", getManagedKeyCtrCfgDeprecated(pool)) } suffixNum := 0 // Go through the list of ctrcfg objects created and get the max suffix value currently created @@ -246,8 +246,8 @@ func getManagedKeyRegDeprecated(pool *mcfgv1.MachineConfigPool) string { return fmt.Sprintf("99-%s-%s-registries", pool.Name, pool.ObjectMeta.UID) } -func getManagedKeyReg(pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface) (string, error) { - return ctrlcommon.GetManagedKey(pool, client, "99", "registries", getManagedKeyRegDeprecated(pool)) +func getManagedKeyReg(ctx context.Context, pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface) (string, error) { + return ctrlcommon.GetManagedKey(ctx, pool, client, "99", "registries", getManagedKeyRegDeprecated(pool)) } func wrapErrorWithCondition(err error, args ...interface{}) mcfgv1.ContainerRuntimeConfigCondition { From 448f3a9c1018c7fdbead9e0c2f79e5212da49eab Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:15:22 -0500 Subject: [PATCH 09/18] Context cancellation for drain controller This adds context cancellation to the functions in the drain controller. This favors explicitly passing the context as the first parameter so it is apparent that the function is cancellable. This also makes the existing context that was part of the controller struct "live" rather than a context.TODO() for cases where the function signature could not change due to interface constraints (like Informer event handlers), but that usage makes it less clear what is cancellable and what isn't, so I tried to avoid it unless absolutely necessary. --- pkg/controller/drain/drain_controller.go | 46 ++++++++++++++---------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/pkg/controller/drain/drain_controller.go b/pkg/controller/drain/drain_controller.go index fb8ca77a16..e78f7a7aaa 100644 --- a/pkg/controller/drain/drain_controller.go +++ b/pkg/controller/drain/drain_controller.go @@ -50,11 +50,13 @@ const ( // Controller defines the node controller. type Controller struct { + name string + client mcfgclientset.Interface kubeClient clientset.Interface eventRecorder record.EventRecorder - syncHandler func(node string) error + syncHandler func(ctx context.Context, node string) error enqueueNode func(*corev1.Node) nodeLister corelisterv1.NodeLister @@ -76,6 +78,7 @@ func New( eventBroadcaster.StartRecordingToSink(&coreclientsetv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) ctrl := &Controller{ + name: "DrainController", client: mcfgClient, kubeClient: kubeClient, eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "machineconfigcontroller-nodecontroller"}), @@ -108,11 +111,11 @@ func (w writer) Write(p []byte) (n int, err error) { } // Run executes the drain controller. -func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { +func (ctrl *Controller) Run(ctx context.Context, workers int) { defer utilruntime.HandleCrash() defer ctrl.queue.ShutDown() - if !cache.WaitForCacheSync(stopCh, ctrl.nodeListerSynced) { + if !cache.WaitForCacheSync(ctx.Done(), ctrl.nodeListerSynced) { return } @@ -129,10 +132,10 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { defer glog.Info("Shutting down MachineConfigController-DrainController") for i := 0; i < workers; i++ { - go wait.Until(ctrl.worker, time.Second, stopCh) + go wait.UntilWithContext(ctx, ctrl.worker, time.Second) } - <-stopCh + <-ctx.Done() } // logNode emits a log message at informational level, prefixed with the node name in a consistent fashion. @@ -185,19 +188,19 @@ func (ctrl *Controller) enqueueDefault(node *corev1.Node) { // worker runs a worker thread that just dequeues items, processes them, and marks them done. // It enforces that the syncHandler is never invoked concurrently with the same key. -func (ctrl *Controller) worker() { - for ctrl.processNextWorkItem() { +func (ctrl *Controller) worker(ctx context.Context) { + for ctrl.processNextWorkItem(ctx) { } } -func (ctrl *Controller) processNextWorkItem() bool { +func (ctrl *Controller) processNextWorkItem(ctx context.Context) bool { key, quit := ctrl.queue.Get() if quit { return false } defer ctrl.queue.Done(key) - err := ctrl.syncHandler(key.(string)) + err := ctrl.syncHandler(ctx, key.(string)) ctrl.handleErr(err, key) return true @@ -221,7 +224,7 @@ func (ctrl *Controller) handleErr(err error, key interface{}) { ctrl.queue.AddAfter(key, 1*time.Minute) } -func (ctrl *Controller) syncNode(key string) error { +func (ctrl *Controller) syncNode(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing node %q (%v)", key, startTime) defer func() { @@ -273,7 +276,7 @@ func (ctrl *Controller) syncNode(key string) error { }, Out: writer{glog.Info}, ErrOut: writer{glog.Error}, - Ctx: context.TODO(), + Ctx: ctx, } desiredVerb := strings.Split(desiredState, "-")[0] @@ -281,7 +284,7 @@ func (ctrl *Controller) syncNode(key string) error { case daemonconsts.DrainerStateUncordon: ctrl.logNode(node, "uncordoning") // perform uncordon - if err := ctrl.cordonOrUncordonNode(false, node, drainer); err != nil { + if err := ctrl.cordonOrUncordonNode(ctx, false, node, drainer); err != nil { return fmt.Errorf("failed to uncordon node %v: %w", node.Name, err) } case daemonconsts.DrainerStateDrain: @@ -309,7 +312,7 @@ func (ctrl *Controller) syncNode(key string) error { if !ongoingDrain { ctrl.logNode(node, "cordoning") // perform cordon - if err := ctrl.cordonOrUncordonNode(true, node, drainer); err != nil { + if err := ctrl.cordonOrUncordonNode(ctx, true, node, drainer); err != nil { return fmt.Errorf("node %s: failed to cordon: %w", node.Name, err) } ctrl.ongoingDrains[node.Name] = time.Now() @@ -334,17 +337,17 @@ func (ctrl *Controller) syncNode(key string) error { annotations := map[string]string{ daemonconsts.LastAppliedDrainerAnnotationKey: desiredState, } - if err := ctrl.setNodeAnnotations(node.Name, annotations); err != nil { + if err := ctrl.setNodeAnnotations(ctx, node.Name, annotations); err != nil { return fmt.Errorf("node %s: failed to set node uncordoned annotation: %w", node.Name, err) } return nil } -func (ctrl *Controller) setNodeAnnotations(nodeName string, annotations map[string]string) error { +func (ctrl *Controller) setNodeAnnotations(ctx context.Context, nodeName string, annotations map[string]string) error { // TODO dedupe if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - n, err := ctrl.kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + n, err := ctrl.kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) if err != nil { return err } @@ -368,7 +371,7 @@ func (ctrl *Controller) setNodeAnnotations(nodeName string, annotations map[stri return fmt.Errorf("node %s: failed to create patch for: %w", nodeName, err) } - _, err = ctrl.kubeClient.CoreV1().Nodes().Patch(context.TODO(), nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) + _, err = ctrl.kubeClient.CoreV1().Nodes().Patch(ctx, nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) return err }); err != nil { // may be conflict if max retries were hit @@ -377,7 +380,7 @@ func (ctrl *Controller) setNodeAnnotations(nodeName string, annotations map[stri return nil } -func (ctrl *Controller) cordonOrUncordonNode(desired bool, node *corev1.Node, drainer *drain.Helper) error { +func (ctrl *Controller) cordonOrUncordonNode(ctx context.Context, desired bool, node *corev1.Node, drainer *drain.Helper) error { // Copied over from daemon // TODO this code has some sync issues verb := "cordon" @@ -404,7 +407,7 @@ func (ctrl *Controller) cordonOrUncordonNode(desired bool, node *corev1.Node, dr // Re-fetch node so that we are not using cached information var updatedNode *corev1.Node - if updatedNode, err = ctrl.kubeClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}); err != nil { + if updatedNode, err = ctrl.kubeClient.CoreV1().Nodes().Get(ctx, node.Name, metav1.GetOptions{}); err != nil { lastErr = err glog.Errorf("Failed to fetch node %v, retrying", err) return false, nil @@ -428,3 +431,8 @@ func (ctrl *Controller) cordonOrUncordonNode(desired bool, node *corev1.Node, dr return nil } + +// Name() returns the name of this controller +func (ctrl *Controller) Name() string { + return ctrl.name +} From c411d696948472e6a82c3da676e91de245f13ff4 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:19:01 -0500 Subject: [PATCH 10/18] Context cancellation for kubelet config controller This adds context cancellation to the functions in the kubelet config controller. This favors explicitly passing the context as the first parameter so it is apparent that the function is cancellable. This also adds a private context to the controller struct for cases where the function signature could not change due to interface constraints (like Informer event handlers), but that usage makes it less clear what is cancellable and what isn't, so I tried to avoid it unless absolutely necessary. --- pkg/controller/kubelet-config/helpers.go | 18 +-- .../kubelet_config_bootstrap.go | 9 +- .../kubelet_config_bootstrap_test.go | 13 +- .../kubelet_config_controller.go | 117 ++++++++++-------- .../kubelet_config_controller_test.go | 31 ++--- .../kubelet-config/kubelet_config_features.go | 22 ++-- .../kubelet_config_features_test.go | 13 +- .../kubelet-config/kubelet_config_nodes.go | 24 ++-- .../kubelet_config_nodes_test.go | 7 +- 9 files changed, 136 insertions(+), 118 deletions(-) diff --git a/pkg/controller/kubelet-config/helpers.go b/pkg/controller/kubelet-config/helpers.go index c7952eaf0c..b1ad231826 100644 --- a/pkg/controller/kubelet-config/helpers.go +++ b/pkg/controller/kubelet-config/helpers.go @@ -131,16 +131,16 @@ func findKubeletConfig(mc *mcfgv1.MachineConfig) (*ign3types.File, error) { } // nolint: dupl -func getManagedKubeletConfigKey(pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface, cfg *mcfgv1.KubeletConfig) (string, error) { +func getManagedKubeletConfigKey(ctx context.Context, pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface, cfg *mcfgv1.KubeletConfig) (string, error) { // Get all the kubelet config CRs - kcListAll, err := client.MachineconfigurationV1().KubeletConfigs().List(context.TODO(), metav1.ListOptions{}) + kcListAll, err := client.MachineconfigurationV1().KubeletConfigs().List(ctx, metav1.ListOptions{}) if err != nil { return "", fmt.Errorf("error listing kubelet configs: %w", err) } // If there is no kubelet config in the list, return the default MC name with no suffix if kcListAll == nil || len(kcListAll.Items) == 0 { - return ctrlcommon.GetManagedKey(pool, client, "99", "kubelet", getManagedKubeletConfigKeyDeprecated(pool)) + return ctrlcommon.GetManagedKey(ctx, pool, client, "99", "kubelet", getManagedKubeletConfigKeyDeprecated(pool)) } var kcList []mcfgv1.KubeletConfig @@ -169,13 +169,13 @@ func getManagedKubeletConfigKey(pool *mcfgv1.MachineConfigPool, client mcfgclien return fmt.Sprintf("99-%s-generated-kubelet-%s", pool.Name, val), nil } // if the suffix val is "", mc name should not suffixed the cfg to be updated is the first kubelet config has been created - return ctrlcommon.GetManagedKey(pool, client, "99", "kubelet", getManagedKubeletConfigKeyDeprecated(pool)) + return ctrlcommon.GetManagedKey(ctx, pool, client, "99", "kubelet", getManagedKubeletConfigKeyDeprecated(pool)) } // If we are here, this means that a new kubelet config was created, so we have to calculate the suffix value for its MC name // if the kubelet config is the only one in the list, mc name should not suffixed since cfg is the first kubelet config to be created if len(kcList) == 1 { - return ctrlcommon.GetManagedKey(pool, client, "99", "kubelet", getManagedKubeletConfigKeyDeprecated(pool)) + return ctrlcommon.GetManagedKey(ctx, pool, client, "99", "kubelet", getManagedKubeletConfigKeyDeprecated(pool)) } suffixNum := 0 // Go through the list of kubelet config objects created and get the max suffix value currently created @@ -204,8 +204,8 @@ func getManagedKubeletConfigKey(pool *mcfgv1.MachineConfigPool, client mcfgclien return fmt.Sprintf("99-%s-generated-kubelet-%s", pool.Name, strconv.Itoa(suffixNum+1)), nil } -func getManagedFeaturesKey(pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface) (string, error) { - return ctrlcommon.GetManagedKey(pool, client, "98", "kubelet", getManagedFeaturesKeyDeprecated(pool)) +func getManagedFeaturesKey(ctx context.Context, pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface) (string, error) { + return ctrlcommon.GetManagedKey(ctx, pool, client, "98", "kubelet", getManagedFeaturesKeyDeprecated(pool)) } // Deprecated: use getManagedFeaturesKey @@ -213,8 +213,8 @@ func getManagedFeaturesKeyDeprecated(pool *mcfgv1.MachineConfigPool) string { return fmt.Sprintf("98-%s-%s-kubelet", pool.Name, pool.ObjectMeta.UID) } -func getManagedNodeConfigKey(pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface) (string, error) { - return ctrlcommon.GetManagedKey(pool, client, "97", "kubelet", fmt.Sprintf("97-%s-%s-kubelet", pool.Name, pool.ObjectMeta.UID)) +func getManagedNodeConfigKey(ctx context.Context, pool *mcfgv1.MachineConfigPool, client mcfgclientset.Interface) (string, error) { + return ctrlcommon.GetManagedKey(ctx, pool, client, "97", "kubelet", fmt.Sprintf("97-%s-%s-kubelet", pool.Name, pool.ObjectMeta.UID)) } // Deprecated: use getManagedKubeletConfigKey diff --git a/pkg/controller/kubelet-config/kubelet_config_bootstrap.go b/pkg/controller/kubelet-config/kubelet_config_bootstrap.go index 271663324a..69eebc53a3 100644 --- a/pkg/controller/kubelet-config/kubelet_config_bootstrap.go +++ b/pkg/controller/kubelet-config/kubelet_config_bootstrap.go @@ -1,6 +1,7 @@ package kubeletconfig import ( + "context" "encoding/json" "fmt" @@ -13,7 +14,7 @@ import ( ) // RunKubeletBootstrap generates MachineConfig objects for mcpPools that would have been generated by syncKubeletConfig -func RunKubeletBootstrap(templateDir string, kubeletConfigs []*mcfgv1.KubeletConfig, controllerConfig *mcfgv1.ControllerConfig, features *configv1.FeatureGate, nodeConfig *configv1.Node, mcpPools []*mcfgv1.MachineConfigPool) ([]*mcfgv1.MachineConfig, error) { +func RunKubeletBootstrap(ctx context.Context, templateDir string, kubeletConfigs []*mcfgv1.KubeletConfig, controllerConfig *mcfgv1.ControllerConfig, features *configv1.FeatureGate, nodeConfig *configv1.Node, mcpPools []*mcfgv1.MachineConfigPool) ([]*mcfgv1.MachineConfig, error) { var res []*mcfgv1.MachineConfig managedKeyExist := make(map[string]bool) // Validate the KubeletConfig CR if exists @@ -76,7 +77,7 @@ func RunKubeletBootstrap(templateDir string, kubeletConfigs []*mcfgv1.KubeletCon if err != nil { return nil, err } - managedKey, err := generateBootstrapManagedKeyKubelet(pool, managedKeyExist) + managedKey, err := generateBootstrapManagedKeyKubelet(ctx, pool, managedKeyExist) if err != nil { return nil, err } @@ -109,11 +110,11 @@ func RunKubeletBootstrap(templateDir string, kubeletConfigs []*mcfgv1.KubeletCon // Note: Only one kubeletconfig manifest per pool is allowed for bootstrap mode for the following reason: // if you provide multiple per pool, they would overwrite each other and not merge, potentially confusing customers post install; // we can simplify the logic for the bootstrap generation and avoid some edge cases. -func generateBootstrapManagedKeyKubelet(pool *mcfgv1.MachineConfigPool, managedKeyExist map[string]bool) (string, error) { +func generateBootstrapManagedKeyKubelet(ctx context.Context, pool *mcfgv1.MachineConfigPool, managedKeyExist map[string]bool) (string, error) { if _, ok := managedKeyExist[pool.Name]; ok { return "", fmt.Errorf("Error found multiple KubeletConfigs targeting MachineConfigPool %v. Please apply only one KubeletConfig manifest for each pool during installation", pool.Name) } - managedKey, err := ctrlcommon.GetManagedKey(pool, nil, "99", "kubelet", "") + managedKey, err := ctrlcommon.GetManagedKey(ctx, pool, nil, "99", "kubelet", "") if err != nil { return "", err } diff --git a/pkg/controller/kubelet-config/kubelet_config_bootstrap_test.go b/pkg/controller/kubelet-config/kubelet_config_bootstrap_test.go index 20c402b358..9c4acd5e15 100644 --- a/pkg/controller/kubelet-config/kubelet_config_bootstrap_test.go +++ b/pkg/controller/kubelet-config/kubelet_config_bootstrap_test.go @@ -1,6 +1,7 @@ package kubeletconfig import ( + "context" "fmt" "testing" @@ -74,7 +75,7 @@ func TestRunKubeletBootstrap(t *testing.T) { }, } - mcs, err := RunKubeletBootstrap("../../../templates", cfgs, cc, nil, nil, pools) + mcs, err := RunKubeletBootstrap(context.TODO(), "../../../templates", cfgs, cc, nil, nil, pools) require.NoError(t, err) require.Len(t, mcs, len(cfgs)) @@ -135,7 +136,7 @@ func TestGenerateDefaultManagedKeyKubelet(t *testing.T) { "99-master-generated-kubelet", // kubeletconfig apply to master pool, expected managedKey for master pool }, } { - res, err := generateBootstrapManagedKeyKubelet(tc.pool, managedKeyExist) + res, err := generateBootstrapManagedKeyKubelet(context.TODO(), tc.pool, managedKeyExist) require.NoError(t, err) require.Equal(t, tc.expectedManagedKey, res) } @@ -188,7 +189,7 @@ func TestGenerateDefaultManagedKeyKubelet(t *testing.T) { fmt.Errorf("Error found multiple KubeletConfigs targeting MachineConfigPool master. Please apply only one KubeletConfig manifest for each pool during installation"), }, } { - res, err := generateBootstrapManagedKeyKubelet(tc.pool, managedKeyExist) + res, err := generateBootstrapManagedKeyKubelet(context.TODO(), tc.pool, managedKeyExist) require.Equal(t, tc.expectedErr, err) require.Equal(t, tc.expectedManagedKey, res) } @@ -212,7 +213,7 @@ func TestAddKubeletCfgAfterBootstrapKubeletCfg(t *testing.T) { f.mckLister = append(f.mckLister, kc) f.objects = append(f.objects, kc) - mcs, err := RunKubeletBootstrap("../../../templates", []*mcfgv1.KubeletConfig{kc}, cc, nil, nil, pools) + mcs, err := RunKubeletBootstrap(context.TODO(), "../../../templates", []*mcfgv1.KubeletConfig{kc}, cc, nil, nil, pools) require.NoError(t, err) require.Len(t, mcs, 1) @@ -222,14 +223,14 @@ func TestAddKubeletCfgAfterBootstrapKubeletCfg(t *testing.T) { f.mckLister = append(f.mckLister, kc1) f.objects = append(f.objects, kc1) c := f.newController() - err = c.syncHandler(getKey(kc1, t)) + err = c.syncHandler(context.TODO(), getKey(kc1, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } // resync kc and check the managedKey c = f.newController() - err = c.syncHandler(getKey(kc, t)) + err = c.syncHandler(context.TODO(), getKey(kc, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } diff --git a/pkg/controller/kubelet-config/kubelet_config_controller.go b/pkg/controller/kubelet-config/kubelet_config_controller.go index a077c7d05b..5ae16f48c1 100644 --- a/pkg/controller/kubelet-config/kubelet_config_controller.go +++ b/pkg/controller/kubelet-config/kubelet_config_controller.go @@ -73,13 +73,15 @@ var errCouldNotFindMCPSet = errors.New("could not find any MachineConfigPool set // Controller defines the kubelet config controller. type Controller struct { + name string + templatesDir string client mcfgclientset.Interface configClient configclientset.Interface eventRecorder record.EventRecorder - syncHandler func(mcp string) error + syncHandler func(ctx context.Context, mcp string) error enqueueKubeletConfig func(*mcfgv1.KubeletConfig) ccLister mcfglistersv1.ControllerConfigLister @@ -103,6 +105,8 @@ type Controller struct { queue workqueue.RateLimitingInterface featureQueue workqueue.RateLimitingInterface nodeConfigQueue workqueue.RateLimitingInterface + + ctx context.Context } // New returns a new kubelet config controller @@ -118,11 +122,13 @@ func New( mcfgClient mcfgclientset.Interface, configclient configclientset.Interface, ) *Controller { + eventBroadcaster := record.NewBroadcaster() eventBroadcaster.StartLogging(glog.Infof) eventBroadcaster.StartRecordingToSink(&coreclientsetv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) ctrl := &Controller{ + name: "KubeletConfigController", templatesDir: templatesDir, client: mcfgClient, configClient: configclient, @@ -175,13 +181,14 @@ func New( } // Run executes the kubelet config controller. -func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { +func (ctrl *Controller) Run(ctx context.Context, workers int) { defer utilruntime.HandleCrash() defer ctrl.queue.ShutDown() defer ctrl.featureQueue.ShutDown() defer ctrl.nodeConfigQueue.ShutDown() + ctrl.ctx = ctx - if !cache.WaitForCacheSync(stopCh, ctrl.mcpListerSynced, ctrl.mckListerSynced, ctrl.ccListerSynced, ctrl.featListerSynced, ctrl.apiserverListerSynced) { + if !cache.WaitForCacheSync(ctx.Done(), ctrl.mcpListerSynced, ctrl.mckListerSynced, ctrl.ccListerSynced, ctrl.featListerSynced, ctrl.apiserverListerSynced) { return } @@ -189,18 +196,18 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { defer glog.Info("Shutting down MachineConfigController-KubeletConfigController") for i := 0; i < workers; i++ { - go wait.Until(ctrl.worker, time.Second, stopCh) + go wait.UntilWithContext(ctx, ctrl.worker, time.Second) } for i := 0; i < workers; i++ { - go wait.Until(ctrl.featureWorker, time.Second, stopCh) + go wait.UntilWithContext(ctx, ctrl.featureWorker, time.Second) } for i := 0; i < workers; i++ { - go wait.Until(ctrl.nodeConfigWorker, time.Second, stopCh) + go wait.UntilWithContext(ctx, ctrl.nodeConfigWorker, time.Second) } - <-stopCh + <-ctx.Done() } func kubeletConfigTriggerObjectChange(old, new *mcfgv1.KubeletConfig) bool { @@ -243,32 +250,32 @@ func (ctrl *Controller) deleteKubeletConfig(obj interface{}) { return } } - if err := ctrl.cascadeDelete(cfg); err != nil { + if err := ctrl.cascadeDelete(ctrl.ctx, cfg); err != nil { utilruntime.HandleError(fmt.Errorf("couldn't delete object %#v: %w", cfg, err)) } else { glog.V(4).Infof("Deleted KubeletConfig %s and restored default config", cfg.Name) } } -func (ctrl *Controller) cascadeDelete(cfg *mcfgv1.KubeletConfig) error { +func (ctrl *Controller) cascadeDelete(ctx context.Context, cfg *mcfgv1.KubeletConfig) error { if len(cfg.GetFinalizers()) == 0 { return nil } finalizerName := cfg.GetFinalizers()[0] - mcs, err := ctrl.client.MachineconfigurationV1().MachineConfigs().List(context.TODO(), metav1.ListOptions{}) + mcs, err := ctrl.client.MachineconfigurationV1().MachineConfigs().List(ctx, metav1.ListOptions{}) if err != nil { return err } for _, mc := range mcs.Items { if string(mc.ObjectMeta.GetUID()) == finalizerName || mc.GetName() == finalizerName { - err := ctrl.client.MachineconfigurationV1().MachineConfigs().Delete(context.TODO(), mc.GetName(), metav1.DeleteOptions{}) + err := ctrl.client.MachineconfigurationV1().MachineConfigs().Delete(ctx, mc.GetName(), metav1.DeleteOptions{}) if err != nil && !macherrors.IsNotFound(err) { return err } break } } - if err := ctrl.popFinalizerFromKubeletConfig(cfg); err != nil { + if err := ctrl.popFinalizerFromKubeletConfig(ctx, cfg); err != nil { return err } return nil @@ -294,19 +301,19 @@ func (ctrl *Controller) enqueueRateLimited(cfg *mcfgv1.KubeletConfig) { // worker runs a worker thread that just dequeues items, processes them, and marks them done. // It enforces that the syncHandler is never invoked concurrently with the same key. -func (ctrl *Controller) worker() { - for ctrl.processNextWorkItem() { +func (ctrl *Controller) worker(ctx context.Context) { + for ctrl.processNextWorkItem(ctx) { } } -func (ctrl *Controller) processNextWorkItem() bool { +func (ctrl *Controller) processNextWorkItem(ctx context.Context) bool { key, quit := ctrl.queue.Get() if quit { return false } defer ctrl.queue.Done(key) - err := ctrl.syncHandler(key.(string)) + err := ctrl.syncHandler(ctx, key.(string)) ctrl.handleErr(err, key) return true @@ -404,7 +411,7 @@ func generateOriginalKubeletConfigIgn(cc *mcfgv1.ControllerConfig, templatesDir, return nil, fmt.Errorf("could not generate old kubelet config") } -func (ctrl *Controller) syncStatusOnly(cfg *mcfgv1.KubeletConfig, err error, args ...interface{}) error { +func (ctrl *Controller) syncStatusOnly(ctx context.Context, cfg *mcfgv1.KubeletConfig, err error, args ...interface{}) error { statusUpdateError := retry.RetryOnConflict(updateBackoff, func() error { newcfg, getErr := ctrl.mckLister.Get(cfg.Name) if getErr != nil { @@ -417,7 +424,7 @@ func (ctrl *Controller) syncStatusOnly(cfg *mcfgv1.KubeletConfig, err error, arg // reflect the latest time stamp from the new status message. newStatusCondition := wrapErrorWithCondition(err, args...) cleanUpStatusConditions(&newcfg.Status.Conditions, newStatusCondition) - _, lerr := ctrl.client.MachineconfigurationV1().KubeletConfigs().UpdateStatus(context.TODO(), newcfg, metav1.UpdateOptions{}) + _, lerr := ctrl.client.MachineconfigurationV1().KubeletConfigs().UpdateStatus(ctx, newcfg, metav1.UpdateOptions{}) return lerr }) if statusUpdateError != nil { @@ -441,7 +448,7 @@ func cleanUpStatusConditions(statusConditions *[]mcfgv1.KubeletConfigCondition, } // addAnnotation adds the annotions for a kubeletconfig object with the given annotationKey and annotationVal -func (ctrl *Controller) addAnnotation(cfg *mcfgv1.KubeletConfig, annotationKey, annotationVal string) error { +func (ctrl *Controller) addAnnotation(ctx context.Context, cfg *mcfgv1.KubeletConfig, annotationKey, annotationVal string) error { annotationUpdateErr := retry.RetryOnConflict(updateBackoff, func() error { newcfg, getErr := ctrl.mckLister.Get(cfg.Name) if getErr != nil { @@ -450,7 +457,7 @@ func (ctrl *Controller) addAnnotation(cfg *mcfgv1.KubeletConfig, annotationKey, newcfg.SetAnnotations(map[string]string{ annotationKey: annotationVal, }) - _, updateErr := ctrl.client.MachineconfigurationV1().KubeletConfigs().Update(context.TODO(), newcfg, metav1.UpdateOptions{}) + _, updateErr := ctrl.client.MachineconfigurationV1().KubeletConfigs().Update(ctx, newcfg, metav1.UpdateOptions{}) return updateErr }) if annotationUpdateErr != nil { @@ -462,7 +469,7 @@ func (ctrl *Controller) addAnnotation(cfg *mcfgv1.KubeletConfig, annotationKey, // syncKubeletConfig will sync the kubeletconfig with the given key. // This function is not meant to be invoked concurrently with the same key. //nolint:gocyclo -func (ctrl *Controller) syncKubeletConfig(key string) error { +func (ctrl *Controller) syncKubeletConfig(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing kubeletconfig %q (%v)", key, startTime) defer func() { @@ -495,7 +502,7 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { // Check for Deleted KubeletConfig and optionally delete finalizers if cfg.DeletionTimestamp != nil { if len(cfg.GetFinalizers()) > 0 { - return ctrl.cascadeDelete(cfg) + return ctrl.cascadeDelete(ctx, cfg) } return nil } @@ -507,19 +514,19 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { // Validate the KubeletConfig CR if err := validateUserKubeletConfig(cfg); err != nil { - return ctrl.syncStatusOnly(cfg, newForgetError(err)) + return ctrl.syncStatusOnly(ctx, cfg, newForgetError(err)) } // Find all MachineConfigPools mcpPools, err := ctrl.getPoolsForKubeletConfig(cfg) if err != nil { - return ctrl.syncStatusOnly(cfg, err) + return ctrl.syncStatusOnly(ctx, cfg, err) } if len(mcpPools) == 0 { err := fmt.Errorf("KubeletConfig %v does not match any MachineConfigPools", key) glog.V(2).Infof("%v", err) - return ctrl.syncStatusOnly(cfg, err) + return ctrl.syncStatusOnly(ctx, cfg, err) } features, err := ctrl.featLister.Get(ctrlcommon.ClusterFeatureInstanceName) @@ -528,7 +535,7 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { } else if err != nil { glog.V(2).Infof("%v", err) err := fmt.Errorf("could not fetch FeatureGates: %w", err) - return ctrl.syncStatusOnly(cfg, err) + return ctrl.syncStatusOnly(ctx, cfg, err) } // Fetch the NodeConfig @@ -537,7 +544,7 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { nodeConfig = createNewDefaultNodeconfig() } else if err != nil { err := fmt.Errorf("could not fetch Node: %v", err) - return ctrl.syncStatusOnly(cfg, err) + return ctrl.syncStatusOnly(ctx, cfg, err) } for _, pool := range mcpPools { @@ -551,13 +558,13 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { } role := pool.Name // Get MachineConfig - managedKey, err := getManagedKubeletConfigKey(pool, ctrl.client, cfg) + managedKey, err := getManagedKubeletConfigKey(ctx, pool, ctrl.client, cfg) if err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not get kubelet config key: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not get kubelet config key: %v", err) } - mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), managedKey, metav1.GetOptions{}) + mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(ctx, managedKey, metav1.GetOptions{}) if err != nil && !macherrors.IsNotFound(err) { - return ctrl.syncStatusOnly(cfg, err, "could not find MachineConfig: %v", managedKey) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not find MachineConfig: %v", managedKey) } isNotFound := macherrors.IsNotFound(err) @@ -569,7 +576,7 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { originalKubeConfig, err := generateOriginalKubeletConfigWithFeatureGates(cc, ctrl.templatesDir, role, features) if err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not get original kubelet config: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not get original kubelet config: %v", err) } // updating the originalKubeConfig based on the nodeConfig on a worker node if role == ctrlcommon.MachineConfigPoolWorker { @@ -580,7 +587,7 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { var profile *configv1.TLSSecurityProfile if apiServerSettings, err := ctrl.apiserverLister.Get(defaultOpenshiftTLSSecurityProfileConfig); err != nil { if !macherrors.IsNotFound(err) { - return ctrl.syncStatusOnly(cfg, err, "could not get the TLSSecurityProfile from %v: %v", defaultOpenshiftTLSSecurityProfileConfig, err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not get the TLSSecurityProfile from %v: %v", defaultOpenshiftTLSSecurityProfileConfig, err) } } else { profile = apiServerSettings.Spec.TLSSecurityProfile @@ -595,14 +602,14 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { kubeletIgnition, logLevelIgnition, autoSizingReservedIgnition, err := generateKubeletIgnFiles(cfg, originalKubeConfig) if err != nil { - return ctrl.syncStatusOnly(cfg, err) + return ctrl.syncStatusOnly(ctx, cfg, err) } if isNotFound { ignConfig := ctrlcommon.NewIgnConfig() mc, err = ctrlcommon.MachineConfigFromIgnConfig(role, managedKey, ignConfig) if err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not create MachineConfig from new Ignition config: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not create MachineConfig from new Ignition config: %v", err) } mc.ObjectMeta.UID = uuid.NewUUID() _, ok := cfg.GetAnnotations()[ctrlcommon.MCNameSuffixAnnotationKey] @@ -610,8 +617,8 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { // the first managed key value 99-poolname-generated-kubelet does not have a suffix // set "" as suffix annotation to the kubelet config object if _, err := strconv.Atoi(arr[len(arr)-1]); err != nil && !ok { - if err := ctrl.addAnnotation(cfg, ctrlcommon.MCNameSuffixAnnotationKey, ""); err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not update annotation for kubeletConfig") + if err := ctrl.addAnnotation(ctx, cfg, ctrlcommon.MCNameSuffixAnnotationKey, ""); err != nil { + return ctrl.syncStatusOnly(ctx, cfg, err, "could not update annotation for kubeletConfig") } } // If the MC name suffix annotation does not exist and the managed key value returned has a suffix, then add the MC name @@ -619,8 +626,8 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { if len(arr) > 4 && !ok { _, err := strconv.Atoi(arr[len(arr)-1]) if err == nil { - if err := ctrl.addAnnotation(cfg, ctrlcommon.MCNameSuffixAnnotationKey, arr[len(arr)-1]); err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not update annotation for kubeletConfig") + if err := ctrl.addAnnotation(ctx, cfg, ctrlcommon.MCNameSuffixAnnotationKey, arr[len(arr)-1]); err != nil { + return ctrl.syncStatusOnly(ctx, cfg, err, "could not update annotation for kubeletConfig") } } } @@ -639,7 +646,7 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { rawIgn, err := json.Marshal(tempIgnConfig) if err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not marshal kubelet config Ignition: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not marshal kubelet config Ignition: %v", err) } mc.Spec.Config.Raw = rawIgn @@ -653,24 +660,25 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { if err := retry.RetryOnConflict(updateBackoff, func() error { var err error if isNotFound { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(context.TODO(), mc, metav1.CreateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(ctx, mc, metav1.CreateOptions{}) } else { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(context.TODO(), mc, metav1.UpdateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(ctx, mc, metav1.UpdateOptions{}) } return err }); err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not Create/Update MachineConfig: %v", err) + return ctrl.syncStatusOnly(ctx, cfg, err, "could not Create/Update MachineConfig: %v", err) } // Add Finalizers to the KubletConfig - if err := ctrl.addFinalizerToKubeletConfig(cfg, mc); err != nil { - return ctrl.syncStatusOnly(cfg, err, "could not add finalizers to KubeletConfig: %v", err) + if err := ctrl.addFinalizerToKubeletConfig(ctx, cfg, mc); err != nil { + return ctrl.syncStatusOnly(ctx, cfg, err, "could not add finalizers to KubeletConfig: %v", err) } glog.Infof("Applied KubeletConfig %v on MachineConfigPool %v", key, pool.Name) } - return ctrl.syncStatusOnly(cfg, nil) + return ctrl.syncStatusOnly(ctx, cfg, nil) } -func (ctrl *Controller) popFinalizerFromKubeletConfig(kc *mcfgv1.KubeletConfig) error { +func (ctrl *Controller) popFinalizerFromKubeletConfig(ctx context.Context, kc *mcfgv1.KubeletConfig) error { + // TODO(jkyros): what about here? This doesn't support a context for cancelling this directly, return retry.RetryOnConflict(updateBackoff, func() error { newcfg, err := ctrl.mckLister.Get(kc.Name) if macherrors.IsNotFound(err) { @@ -697,16 +705,16 @@ func (ctrl *Controller) popFinalizerFromKubeletConfig(kc *mcfgv1.KubeletConfig) if err != nil { return err } - return ctrl.patchKubeletConfigs(newcfg.Name, patch) + return ctrl.patchKubeletConfigs(ctx, newcfg.Name, patch) }) } -func (ctrl *Controller) patchKubeletConfigs(name string, patch []byte) error { - _, err := ctrl.client.MachineconfigurationV1().KubeletConfigs().Patch(context.TODO(), name, types.MergePatchType, patch, metav1.PatchOptions{}) +func (ctrl *Controller) patchKubeletConfigs(ctx context.Context, name string, patch []byte) error { + _, err := ctrl.client.MachineconfigurationV1().KubeletConfigs().Patch(ctx, name, types.MergePatchType, patch, metav1.PatchOptions{}) return err } -func (ctrl *Controller) addFinalizerToKubeletConfig(kc *mcfgv1.KubeletConfig, mc *mcfgv1.MachineConfig) error { +func (ctrl *Controller) addFinalizerToKubeletConfig(ctx context.Context, kc *mcfgv1.KubeletConfig, mc *mcfgv1.MachineConfig) error { return retry.RetryOnConflict(updateBackoff, func() error { newcfg, err := ctrl.mckLister.Get(kc.Name) if macherrors.IsNotFound(err) { @@ -748,7 +756,7 @@ func (ctrl *Controller) addFinalizerToKubeletConfig(kc *mcfgv1.KubeletConfig, mc if err != nil { return err } - return ctrl.patchKubeletConfigs(newcfg.Name, patch) + return ctrl.patchKubeletConfigs(ctx, newcfg.Name, patch) }) } @@ -807,3 +815,8 @@ func getSecurityProfileCiphers(profile *configv1.TLSSecurityProfile) (string, [] // need to remap all Ciphers to their respective IANA names used by Go return string(profileSpec.MinTLSVersion), crypto.OpenSSLToIANACipherSuites(profileSpec.Ciphers) } + +// Name() returns the name of this controller +func (ctrl *Controller) Name() string { + return ctrl.name +} diff --git a/pkg/controller/kubelet-config/kubelet_config_controller_test.go b/pkg/controller/kubelet-config/kubelet_config_controller_test.go index b9822e4a89..b7ba2dc862 100644 --- a/pkg/controller/kubelet-config/kubelet_config_controller_test.go +++ b/pkg/controller/kubelet-config/kubelet_config_controller_test.go @@ -1,6 +1,7 @@ package kubeletconfig import ( + "context" "fmt" "reflect" "testing" @@ -253,7 +254,7 @@ func (f *fixture) runExpectError(mcpname string) { func (f *fixture) runController(mcpname string, expectError bool) { c := f.newController() - err := c.syncHandler(mcpname) + err := c.syncHandler(context.TODO(), mcpname) if !expectError && err != nil { f.t.Errorf("error syncing kubeletconfigs: %v", err) } else if expectError && err == nil { @@ -266,7 +267,7 @@ func (f *fixture) runController(mcpname string, expectError bool) { func (f *fixture) runFeatureController(featname string, expectError bool) { c := f.newController() - err := c.syncFeatureHandler(featname) + err := c.syncFeatureHandler(context.TODO(), featname) if !expectError && err != nil { f.t.Errorf("error syncing kubeletconfigs: %v", err) } else if expectError && err == nil { @@ -278,7 +279,7 @@ func (f *fixture) runFeatureController(featname string, expectError bool) { func (f *fixture) runNodeController(nodename string, expectError bool) { c := f.newController() - err := c.syncNodeConfigHandler(nodename) + err := c.syncNodeConfigHandler(context.TODO(), nodename) if !expectError && err != nil { f.t.Errorf("error syncing node configs: %v", err) } else if expectError && err == nil { @@ -415,7 +416,7 @@ func TestKubeletConfigCreate(t *testing.T) { mcp := helpers.NewMachineConfigPool("master", nil, helpers.MasterSelector, "v0") mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") kc1 := newKubeletConfig("smaller-max-pods", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 100}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) - kubeletConfigKey, _ := getManagedKubeletConfigKey(mcp, f.client, kc1) + kubeletConfigKey, _ := getManagedKubeletConfigKey(context.TODO(), mcp, f.client, kc1) mcs := helpers.NewMachineConfig(kubeletConfigKey, map[string]string{"node-role/master": ""}, "dummy://", []ign3types.File{{}}) mcsDeprecated := mcs.DeepCopy() mcsDeprecated.Name = getManagedKubeletConfigKeyDeprecated(mcp) @@ -507,7 +508,7 @@ func TestKubeletConfigAutoSizingReserved(t *testing.T) { }, Status: mcfgv1.KubeletConfigStatus{}, } - kubeletConfigKey, _ := getManagedKubeletConfigKey(mcp, f.client, kc1) + kubeletConfigKey, _ := getManagedKubeletConfigKey(context.TODO(), mcp, f.client, kc1) mcs := helpers.NewMachineConfig(kubeletConfigKey, map[string]string{"node-role/master": ""}, "dummy://", []ign3types.File{{}}) mcsDeprecated := mcs.DeepCopy() mcsDeprecated.Name = getManagedKubeletConfigKeyDeprecated(mcp) @@ -549,7 +550,7 @@ func TestKubeletConfigLogFile(t *testing.T) { }, Status: mcfgv1.KubeletConfigStatus{}, } - kubeletConfigKey, _ := getManagedKubeletConfigKey(mcp, f.client, kc1) + kubeletConfigKey, _ := getManagedKubeletConfigKey(context.TODO(), mcp, f.client, kc1) mcs := helpers.NewMachineConfig(kubeletConfigKey, map[string]string{"node-role/master": ""}, "dummy://", []ign3types.File{{}}) mcsDeprecated := mcs.DeepCopy() mcsDeprecated.Name = getManagedKubeletConfigKeyDeprecated(mcp) @@ -583,7 +584,7 @@ func TestKubeletConfigUpdates(t *testing.T) { mcp := helpers.NewMachineConfigPool("master", nil, helpers.MasterSelector, "v0") mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") kc1 := newKubeletConfig("smaller-max-pods", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 100}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) - kubeletConfigKey, _ := getManagedKubeletConfigKey(mcp, f.client, kc1) + kubeletConfigKey, _ := getManagedKubeletConfigKey(context.TODO(), mcp, f.client, kc1) mcs := helpers.NewMachineConfig(kubeletConfigKey, map[string]string{"node-role/master": ""}, "dummy://", []ign3types.File{{}}) mcsDeprecated := mcs.DeepCopy() mcsDeprecated.Name = getManagedKubeletConfigKeyDeprecated(mcp) @@ -605,7 +606,7 @@ func TestKubeletConfigUpdates(t *testing.T) { c := f.newController() stopCh := make(chan struct{}) - err := c.syncHandler(getKey(kc1, t)) + err := c.syncHandler(context.TODO(), getKey(kc1, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } @@ -641,7 +642,7 @@ func TestKubeletConfigUpdates(t *testing.T) { glog.Info("Applying update") // Apply update - err = c.syncHandler(getKey(kcUpdate, t)) + err = c.syncHandler(context.TODO(), getKey(kcUpdate, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } @@ -739,7 +740,7 @@ func TestKubeletFeatureExists(t *testing.T) { mcp := helpers.NewMachineConfigPool("master", nil, helpers.MasterSelector, "v0") mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") kc1 := newKubeletConfig("smaller-max-pods", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 100}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) - kubeletConfigKey, _ := getManagedKubeletConfigKey(mcp, f.client, kc1) + kubeletConfigKey, _ := getManagedKubeletConfigKey(context.TODO(), mcp, f.client, kc1) mcs := helpers.NewMachineConfig(kubeletConfigKey, map[string]string{"node-role/master": ""}, "dummy://", []ign3types.File{{}}) mcsDeprecated := mcs.DeepCopy() mcsDeprecated.Name = getManagedKubeletConfigKeyDeprecated(mcp) @@ -884,7 +885,7 @@ func TestKubeletConfigResync(t *testing.T) { kc1 := newKubeletConfig("smaller-max-pods", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 100}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) kc2 := newKubeletConfig("smaller-max-pods-2", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 200}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) - kubeletConfigKey, _ := getManagedKubeletConfigKey(mcp, f.client, kc1) + kubeletConfigKey, _ := getManagedKubeletConfigKey(context.TODO(), mcp, f.client, kc1) mcs := helpers.NewMachineConfig(kubeletConfigKey, map[string]string{"node-role/master": ""}, "dummy://", []ign3types.File{{}}) mcsDeprecated := mcs.DeepCopy() mcsDeprecated.Name = getManagedKubeletConfigKeyDeprecated(mcp) @@ -896,7 +897,7 @@ func TestKubeletConfigResync(t *testing.T) { f.objects = append(f.objects, kc1) c := f.newController() - err := c.syncHandler(getKey(kc1, t)) + err := c.syncHandler(context.TODO(), getKey(kc1, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } @@ -905,7 +906,7 @@ func TestKubeletConfigResync(t *testing.T) { f.objects = append(f.objects, kc2) c = f.newController() - err = c.syncHandler(getKey(kc2, t)) + err = c.syncHandler(context.TODO(), getKey(kc2, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } @@ -918,7 +919,7 @@ func TestKubeletConfigResync(t *testing.T) { // resync kc1 and kc2 c = f.newController() - err = c.syncHandler(getKey(kc1, t)) + err = c.syncHandler(context.TODO(), getKey(kc1, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } @@ -926,7 +927,7 @@ func TestKubeletConfigResync(t *testing.T) { require.Equal(t, "", val) c = f.newController() - err = c.syncHandler(getKey(kc2, t)) + err = c.syncHandler(context.TODO(), getKey(kc2, t)) if err != nil { t.Errorf("syncHandler returned: %v", err) } diff --git a/pkg/controller/kubelet-config/kubelet_config_features.go b/pkg/controller/kubelet-config/kubelet_config_features.go index 371924daa3..d90e9c485e 100644 --- a/pkg/controller/kubelet-config/kubelet_config_features.go +++ b/pkg/controller/kubelet-config/kubelet_config_features.go @@ -29,24 +29,24 @@ var ( } ) -func (ctrl *Controller) featureWorker() { - for ctrl.processNextFeatureWorkItem() { +func (ctrl *Controller) featureWorker(ctx context.Context) { + for ctrl.processNextFeatureWorkItem(ctx) { } } -func (ctrl *Controller) processNextFeatureWorkItem() bool { +func (ctrl *Controller) processNextFeatureWorkItem(ctx context.Context) bool { key, quit := ctrl.featureQueue.Get() if quit { return false } defer ctrl.featureQueue.Done(key) - err := ctrl.syncFeatureHandler(key.(string)) + err := ctrl.syncFeatureHandler(ctx, key.(string)) ctrl.handleFeatureErr(err, key) return true } -func (ctrl *Controller) syncFeatureHandler(key string) error { +func (ctrl *Controller) syncFeatureHandler(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing feature handler %q (%v)", key, startTime) defer func() { @@ -93,11 +93,11 @@ func (ctrl *Controller) syncFeatureHandler(key string) error { } } // Get MachineConfig - managedKey, err := getManagedFeaturesKey(pool, ctrl.client) + managedKey, err := getManagedFeaturesKey(ctx, pool, ctrl.client) if err != nil { return err } - mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), managedKey, metav1.GetOptions{}) + mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(ctx, managedKey, metav1.GetOptions{}) if err != nil && !errors.IsNotFound(err) { return err } @@ -126,9 +126,9 @@ func (ctrl *Controller) syncFeatureHandler(key string) error { if err := retry.RetryOnConflict(updateBackoff, func() error { var err error if isNotFound { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(context.TODO(), mc, metav1.CreateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(ctx, mc, metav1.CreateOptions{}) } else { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(context.TODO(), mc, metav1.UpdateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(ctx, mc, metav1.UpdateOptions{}) } return err }); err != nil { @@ -250,7 +250,7 @@ func generateKubeConfigIgnFromFeatures(cc *mcfgv1.ControllerConfig, templatesDir return rawCfgIgn, nil } -func RunFeatureGateBootstrap(templateDir string, features *osev1.FeatureGate, nodeConfig *osev1.Node, controllerConfig *mcfgv1.ControllerConfig, mcpPools []*mcfgv1.MachineConfigPool) ([]*mcfgv1.MachineConfig, error) { +func RunFeatureGateBootstrap(ctx context.Context, templateDir string, features *osev1.FeatureGate, nodeConfig *osev1.Node, controllerConfig *mcfgv1.ControllerConfig, mcpPools []*mcfgv1.MachineConfigPool) ([]*mcfgv1.MachineConfig, error) { machineConfigs := []*mcfgv1.MachineConfig{} for _, pool := range mcpPools { @@ -271,7 +271,7 @@ func RunFeatureGateBootstrap(templateDir string, features *osev1.FeatureGate, no } // Get MachineConfig - managedKey, err := getManagedFeaturesKey(pool, nil) + managedKey, err := getManagedFeaturesKey(ctx, pool, nil) if err != nil { return nil, err } diff --git a/pkg/controller/kubelet-config/kubelet_config_features_test.go b/pkg/controller/kubelet-config/kubelet_config_features_test.go index 90f7901775..e3f7790cc2 100644 --- a/pkg/controller/kubelet-config/kubelet_config_features_test.go +++ b/pkg/controller/kubelet-config/kubelet_config_features_test.go @@ -1,6 +1,7 @@ package kubeletconfig import ( + "context" "reflect" "testing" @@ -54,9 +55,9 @@ func TestFeaturesDefault(t *testing.T) { mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") kc1 := newKubeletConfig("smaller-max-pods", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 100}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) kc2 := newKubeletConfig("bigger-max-pods", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 250}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) - kubeletConfigKey1, err := getManagedKubeletConfigKey(mcp, f.client, kc1) + kubeletConfigKey1, err := getManagedKubeletConfigKey(context.TODO(), mcp, f.client, kc1) require.NoError(t, err) - kubeletConfigKey2, err := getManagedKubeletConfigKey(mcp2, f.client, kc2) + kubeletConfigKey2, err := getManagedKubeletConfigKey(context.TODO(), mcp2, f.client, kc2) require.NoError(t, err) mcs := helpers.NewMachineConfig(kubeletConfigKey1, map[string]string{"node-role/master": ""}, "dummy://", []ign3types.File{{}}) mcs2 := helpers.NewMachineConfig(kubeletConfigKey2, map[string]string{"node-role/worker": ""}, "dummy://", []ign3types.File{{}}) @@ -95,9 +96,9 @@ func TestFeaturesCustomNoUpgrade(t *testing.T) { mcp2 := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") kc1 := newKubeletConfig("smaller-max-pods", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 100}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) kc2 := newKubeletConfig("bigger-max-pods", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 250}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/master", "")) - kubeletConfigKey1, err := getManagedKubeletConfigKey(mcp, f.client, kc1) + kubeletConfigKey1, err := getManagedKubeletConfigKey(context.TODO(), mcp, f.client, kc1) require.NoError(t, err) - kubeletConfigKey2, err := getManagedKubeletConfigKey(mcp2, f.client, kc2) + kubeletConfigKey2, err := getManagedKubeletConfigKey(context.TODO(), mcp2, f.client, kc2) require.NoError(t, err) mcs := helpers.NewMachineConfig(kubeletConfigKey1, map[string]string{"node-role/master": ""}, "dummy://", []ign3types.File{{}}) mcs2 := helpers.NewMachineConfig(kubeletConfigKey2, map[string]string{"node-role/worker": ""}, "dummy://", []ign3types.File{{}}) @@ -150,7 +151,7 @@ func TestBootstrapFeaturesDefault(t *testing.T) { features := createNewDefaultFeatureGate() - mcs, err := RunFeatureGateBootstrap("../../../templates", features, nil, cc, mcps) + mcs, err := RunFeatureGateBootstrap(context.TODO(), "../../../templates", features, nil, cc, mcps) if err != nil { t.Errorf("could not run feature gate bootstrap: %v", err) } @@ -184,7 +185,7 @@ func TestBootstrapFeaturesCustomNoUpgrade(t *testing.T) { }, } - mcs, err := RunFeatureGateBootstrap("../../../templates", features, nil, cc, mcps) + mcs, err := RunFeatureGateBootstrap(context.TODO(), "../../../templates", features, nil, cc, mcps) if err != nil { t.Errorf("could not run feature gate bootstrap: %v", err) } diff --git a/pkg/controller/kubelet-config/kubelet_config_nodes.go b/pkg/controller/kubelet-config/kubelet_config_nodes.go index fb838bdb07..f389438338 100644 --- a/pkg/controller/kubelet-config/kubelet_config_nodes.go +++ b/pkg/controller/kubelet-config/kubelet_config_nodes.go @@ -21,19 +21,19 @@ import ( "k8s.io/client-go/util/retry" ) -func (ctrl *Controller) nodeConfigWorker() { - for ctrl.processNextNodeConfigWorkItem() { +func (ctrl *Controller) nodeConfigWorker(ctx context.Context) { + for ctrl.processNextNodeConfigWorkItem(ctx) { } } -func (ctrl *Controller) processNextNodeConfigWorkItem() bool { +func (ctrl *Controller) processNextNodeConfigWorkItem(ctx context.Context) bool { key, quit := ctrl.nodeConfigQueue.Get() if quit { return false } defer ctrl.nodeConfigQueue.Done(key) - err := ctrl.syncNodeConfigHandler(key.(string)) + err := ctrl.syncNodeConfigHandler(ctx, key.(string)) ctrl.handleNodeConfigErr(err, key) return true } @@ -59,7 +59,7 @@ func (ctrl *Controller) handleNodeConfigErr(err error, key interface{}) { // syncNodeConfigHandler syncs whenever there is a change on the nodes.config.openshift.io resource // nodes.config.openshift.io object holds the cluster-wide information about the // node specific features such as cgroup modes, workerlatencyprofiles, etc. -func (ctrl *Controller) syncNodeConfigHandler(key string) error { +func (ctrl *Controller) syncNodeConfigHandler(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing nodeConfig handler %q (%v)", key, startTime) defer func() { @@ -102,11 +102,11 @@ func (ctrl *Controller) syncNodeConfigHandler(key string) error { continue } // Get MachineConfig - key, err := getManagedNodeConfigKey(pool, ctrl.client) + key, err := getManagedNodeConfigKey(ctx, pool, ctrl.client) if err != nil { return err } - mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), key, metav1.GetOptions{}) + mc, err := ctrl.client.MachineconfigurationV1().MachineConfigs().Get(ctx, key, metav1.GetOptions{}) isNotFound := errors.IsNotFound(err) if err != nil && !isNotFound { return err @@ -151,9 +151,9 @@ func (ctrl *Controller) syncNodeConfigHandler(key string) error { if err := retry.RetryOnConflict(updateBackoff, func() error { var err error if isNotFound { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(context.TODO(), mc, metav1.CreateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(ctx, mc, metav1.CreateOptions{}) } else { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(context.TODO(), mc, metav1.UpdateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Update(ctx, mc, metav1.UpdateOptions{}) } return err }); err != nil { @@ -168,7 +168,7 @@ func (ctrl *Controller) syncNodeConfigHandler(key string) error { } for _, kc := range kcs { // updating the existing kubeletconfigs with the updated nodeconfig - err := ctrl.syncKubeletConfig(kc.Name) + err := ctrl.syncKubeletConfig(ctx, kc.Name) if err != nil { return fmt.Errorf("could not update KubeletConfig %v, err: %w", kc, err) } @@ -262,7 +262,7 @@ func (ctrl *Controller) deleteNodeConfig(obj interface{}) { glog.V(4).Infof("Deleted node config %s and restored default config", nodeConfig.Name) } -func RunNodeConfigBootstrap(templateDir string, features *osev1.FeatureGate, cconfig *mcfgv1.ControllerConfig, nodeConfig *osev1.Node, mcpPools []*mcfgv1.MachineConfigPool) ([]*mcfgv1.MachineConfig, error) { +func RunNodeConfigBootstrap(ctx context.Context, templateDir string, features *osev1.FeatureGate, cconfig *mcfgv1.ControllerConfig, nodeConfig *osev1.Node, mcpPools []*mcfgv1.MachineConfigPool) ([]*mcfgv1.MachineConfig, error) { configs := []*mcfgv1.MachineConfig{} for _, pool := range mcpPools { @@ -271,7 +271,7 @@ func RunNodeConfigBootstrap(templateDir string, features *osev1.FeatureGate, cco continue } // Get MachineConfig - key, err := getManagedNodeConfigKey(pool, nil) + key, err := getManagedNodeConfigKey(ctx, pool, nil) if err != nil { return nil, err } diff --git a/pkg/controller/kubelet-config/kubelet_config_nodes_test.go b/pkg/controller/kubelet-config/kubelet_config_nodes_test.go index b608d02d64..08d8417abe 100644 --- a/pkg/controller/kubelet-config/kubelet_config_nodes_test.go +++ b/pkg/controller/kubelet-config/kubelet_config_nodes_test.go @@ -1,6 +1,7 @@ package kubeletconfig import ( + "context" "fmt" "reflect" "testing" @@ -50,7 +51,7 @@ func TestNodeConfigDefault(t *testing.T) { cc := newControllerConfig(ctrlcommon.ControllerConfigName, platform) mcp := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") kc := newKubeletConfig("smaller-max-pods", &kubeletconfigv1beta1.KubeletConfiguration{MaxPods: 100}, metav1.AddLabelToSelector(&metav1.LabelSelector{}, "pools.operator.machineconfiguration.openshift.io/worker", "")) - kubeletConfigKey, err := getManagedKubeletConfigKey(mcp, f.client, kc) + kubeletConfigKey, err := getManagedKubeletConfigKey(context.TODO(), mcp, f.client, kc) require.NoError(t, err) mcs := helpers.NewMachineConfig(kubeletConfigKey, map[string]string{"node-role/worker": ""}, "dummy://", []ign3types.File{{}}) mcsDeprecated := mcs.DeepCopy() @@ -84,7 +85,7 @@ func TestBootstrapNodeConfigDefault(t *testing.T) { features := createNewDefaultFeatureGate() configNode := createNewDefaultNodeconfig() - mcs, err := RunNodeConfigBootstrap("../../../templates", features, cc, configNode, mcps) + mcs, err := RunNodeConfigBootstrap(context.TODO(), "../../../templates", features, cc, configNode, mcps) if err != nil { t.Errorf("could not run node config bootstrap: %v", err) } @@ -103,7 +104,7 @@ func TestBootstrapNoNodeConfig(t *testing.T) { mcp := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v0") mcps := []*mcfgv1.MachineConfigPool{mcp} - mcs, err := RunNodeConfigBootstrap("../../../templates", nil, cc, nil, mcps) + mcs, err := RunNodeConfigBootstrap(context.TODO(), "../../../templates", nil, cc, nil, mcps) if err == nil { t.Errorf("expected an error while generating the kubelet config with no node config") } From 0ba4c845bc7b1cc15f086029560d492050db4d50 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:20:52 -0500 Subject: [PATCH 11/18] Context cancellation for node controller This adds context cancellation to the functions in the node controller. This favors explicitly passing the context as the first parameter so it is apparent that the function is cancellable. This also adds a private context to the controller struct for cases where the function signature could not change due to interface constraints (like Informer event handlers), but that usage makes it less clear what is cancellable and what isn't, so I tried to avoid it unless absolutely necessary. --- pkg/controller/node/node_controller.go | 100 ++++++++++++-------- pkg/controller/node/node_controller_test.go | 5 +- pkg/controller/node/status.go | 4 +- 3 files changed, 63 insertions(+), 46 deletions(-) diff --git a/pkg/controller/node/node_controller.go b/pkg/controller/node/node_controller.go index f7337635b7..283ef8b475 100644 --- a/pkg/controller/node/node_controller.go +++ b/pkg/controller/node/node_controller.go @@ -79,11 +79,13 @@ var kubeAPIToKubeletSignerNamePrefixes = []string{"openshift-kube-apiserver-oper // Controller defines the node controller. type Controller struct { + name string + client mcfgclientset.Interface kubeClient clientset.Interface eventRecorder record.EventRecorder - syncHandler func(mcp string) error + syncHandler func(ctx context.Context, mcp string) error enqueueMachineConfigPool func(*mcfgv1.MachineConfigPool) ccLister mcfglistersv1.ControllerConfigLister @@ -100,6 +102,10 @@ type Controller struct { schedulerListerSynced cache.InformerSynced queue workqueue.RateLimitingInterface + + // This is set to TODO() and gets assigned to whatever gets passed to the Run() function. Prefer passing the context when possible, but there are some functions (like event handlers) that can't + // receive a context, so we can have them fish it out of the controller. + ctx context.Context } // New returns a new node controller. @@ -117,6 +123,7 @@ func New( eventBroadcaster.StartRecordingToSink(&coreclientsetv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) ctrl := &Controller{ + name: "NodeController", client: mcfgClient, kubeClient: kubeClient, eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "machineconfigcontroller-nodecontroller"}), @@ -157,11 +164,14 @@ func New( } // Run executes the render controller. -func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { +func (ctrl *Controller) Run(ctx context.Context, workers int) { defer utilruntime.HandleCrash() defer ctrl.queue.ShutDown() - if !cache.WaitForCacheSync(stopCh, ctrl.ccListerSynced, ctrl.mcListerSynced, ctrl.mcpListerSynced, ctrl.nodeListerSynced, ctrl.schedulerListerSynced) { + // Assign this here so our event handlers can use it + ctrl.ctx = ctx + + if !cache.WaitForCacheSync(ctx.Done(), ctrl.ccListerSynced, ctrl.mcListerSynced, ctrl.mcpListerSynced, ctrl.nodeListerSynced, ctrl.schedulerListerSynced) { return } @@ -169,10 +179,10 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { defer glog.Info("Shutting down MachineConfigController-NodeController") for i := 0; i < workers; i++ { - go wait.Until(ctrl.worker, time.Second, stopCh) + go wait.UntilWithContext(ctx, ctrl.worker, time.Second) } - <-stopCh + <-ctx.Done() } func (ctrl *Controller) getCurrentMasters() ([]*corev1.Node, error) { @@ -186,7 +196,7 @@ func (ctrl *Controller) getCurrentMasters() ([]*corev1.Node, error) { // checkMasterNodesOnAdd makes the master nodes schedulable/unschedulable whenever scheduler config CR with name // cluster is created func (ctrl *Controller) checkMasterNodesOnAdd(obj interface{}) { - ctrl.reconcileMasters() + ctrl.reconcileMasters(ctrl.ctx) } // checkMasterNodesOnDelete makes the master nodes schedulable/unschedulable whenever scheduler config CR with name @@ -204,7 +214,9 @@ func (ctrl *Controller) checkMasterNodesOnDelete(obj interface{}) { return } // On deletion make all masters unschedulable to restore default behaviour - errs := ctrl.makeMastersUnSchedulable(currentMasters) + // TODO(jkyros): can't change the event handler interface, need to get the context from somewhere + // since we're doing work here. + errs := ctrl.makeMastersUnSchedulable(ctrl.ctx, currentMasters) if len(errs) > 0 { err = v1helpers.NewMultiLineAggregate(errs) err = fmt.Errorf("reconciling to make nodes schedulable/unschedulable failed: %w", err) @@ -230,14 +242,14 @@ func (ctrl *Controller) checkMasterNodesOnUpdate(old, cur interface{}) { return } - ctrl.reconcileMasters() + ctrl.reconcileMasters(ctrl.ctx) } // makeMastersUnSchedulable makes all the masters in the cluster unschedulable -func (ctrl *Controller) makeMastersUnSchedulable(currentMasters []*corev1.Node) []error { +func (ctrl *Controller) makeMastersUnSchedulable(ctx context.Context, currentMasters []*corev1.Node) []error { var errs []error for _, node := range currentMasters { - if err := ctrl.makeMasterNodeUnSchedulable(node); err != nil { + if err := ctrl.makeMasterNodeUnSchedulable(ctx, node); err != nil { errs = append(errs, fmt.Errorf("failed making node %v schedulable with error %w", node.Name, err)) } } @@ -247,8 +259,8 @@ func (ctrl *Controller) makeMastersUnSchedulable(currentMasters []*corev1.Node) // makeMasterNodeUnSchedulable makes master node unschedulable by removing worker label and adding `NoSchedule` // master taint to the master node -func (ctrl *Controller) makeMasterNodeUnSchedulable(node *corev1.Node) error { - _, err := internal.UpdateNodeRetry(ctrl.kubeClient.CoreV1().Nodes(), ctrl.nodeLister, node.Name, func(node *corev1.Node) { +func (ctrl *Controller) makeMasterNodeUnSchedulable(ctx context.Context, node *corev1.Node) error { + _, err := internal.UpdateNodeRetry(ctx, ctrl.kubeClient.CoreV1().Nodes(), ctrl.nodeLister, node.Name, func(node *corev1.Node) { // Remove worker label newLabels := node.Labels if _, hasWorkerLabel := newLabels[WorkerLabel]; hasWorkerLabel { @@ -277,8 +289,8 @@ func (ctrl *Controller) makeMasterNodeUnSchedulable(node *corev1.Node) error { // makeMasterNodeSchedulable makes master node schedulable by removing NoSchedule master taint and // adding worker label -func (ctrl *Controller) makeMasterNodeSchedulable(node *corev1.Node) error { - _, err := internal.UpdateNodeRetry(ctrl.kubeClient.CoreV1().Nodes(), ctrl.nodeLister, node.Name, func(node *corev1.Node) { +func (ctrl *Controller) makeMasterNodeSchedulable(ctx context.Context, node *corev1.Node) error { + _, err := internal.UpdateNodeRetry(ctx, ctrl.kubeClient.CoreV1().Nodes(), ctrl.nodeLister, node.Name, func(node *corev1.Node) { // Add worker label newLabels := node.Labels if _, hasWorkerLabel := newLabels[WorkerLabel]; !hasWorkerLabel { @@ -368,7 +380,7 @@ func isWindows(node *corev1.Node) bool { } // Given a master Node, ensure it reflects the current mastersSchedulable setting -func (ctrl *Controller) reconcileMaster(node *corev1.Node) { +func (ctrl *Controller) reconcileMaster(ctx context.Context, node *corev1.Node) { mastersSchedulable, err := ctrl.getMastersSchedulable() if err != nil { err = fmt.Errorf("getting scheduler config failed: %w", err) @@ -376,14 +388,14 @@ func (ctrl *Controller) reconcileMaster(node *corev1.Node) { return } if mastersSchedulable { - err = ctrl.makeMasterNodeSchedulable(node) + err = ctrl.makeMasterNodeSchedulable(ctx, node) if err != nil { err = fmt.Errorf("failed making master Node schedulable: %w", err) glog.Error(err) return } } else if !mastersSchedulable { - err = ctrl.makeMasterNodeUnSchedulable(node) + err = ctrl.makeMasterNodeUnSchedulable(ctx, node) if err != nil { err = fmt.Errorf("failed making master Node unschedulable: %w", err) glog.Error(err) @@ -394,7 +406,7 @@ func (ctrl *Controller) reconcileMaster(node *corev1.Node) { // Get a list of current masters and apply scheduler config to them // TODO: Taint reconciliation should happen elsewhere, in a generic taint/label reconciler -func (ctrl *Controller) reconcileMasters() { +func (ctrl *Controller) reconcileMasters(ctx context.Context) { currentMasters, err := ctrl.getCurrentMasters() if err != nil { err = fmt.Errorf("reconciling to make master nodes schedulable/unschedulable failed: %w", err) @@ -402,7 +414,7 @@ func (ctrl *Controller) reconcileMasters() { return } for _, node := range currentMasters { - ctrl.reconcileMaster(node) + ctrl.reconcileMaster(ctx, node) } } @@ -414,7 +426,7 @@ func (ctrl *Controller) addNode(obj interface{}) { } if ctrl.isMaster(node) { - ctrl.reconcileMaster(node) + ctrl.reconcileMaster(ctrl.ctx, node) } pools, err := ctrl.getPoolsForNode(node) @@ -455,7 +467,7 @@ func (ctrl *Controller) updateNode(old, cur interface{}) { } if ctrl.isMaster(curNode) { - ctrl.reconcileMaster(curNode) + ctrl.reconcileMaster(ctrl.ctx, curNode) } pool, err := ctrl.getPrimaryPoolForNode(curNode) @@ -693,19 +705,19 @@ func (ctrl *Controller) enqueueDefault(pool *mcfgv1.MachineConfigPool) { // worker runs a worker thread that just dequeues items, processes them, and marks them done. // It enforces that the syncHandler is never invoked concurrently with the same key. -func (ctrl *Controller) worker() { - for ctrl.processNextWorkItem() { +func (ctrl *Controller) worker(ctx context.Context) { + for ctrl.processNextWorkItem(ctx) { } } -func (ctrl *Controller) processNextWorkItem() bool { +func (ctrl *Controller) processNextWorkItem(ctx context.Context) bool { key, quit := ctrl.queue.Get() if quit { return false } defer ctrl.queue.Done(key) - err := ctrl.syncHandler(key.(string)) + err := ctrl.syncHandler(ctx, key.(string)) ctrl.handleErr(err, key) return true @@ -731,7 +743,7 @@ func (ctrl *Controller) handleErr(err error, key interface{}) { // syncMachineConfigPool will sync the machineconfig pool with the given key. // This function is not meant to be invoked concurrently with the same key. -func (ctrl *Controller) syncMachineConfigPool(key string) error { +func (ctrl *Controller) syncMachineConfigPool(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing machineconfigpool %q (%v)", key, startTime) defer func() { @@ -771,7 +783,7 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { } if pool.DeletionTimestamp != nil { - return ctrl.syncStatusOnly(pool) + return ctrl.syncStatusOnly(ctx, pool) } if pool.Spec.Paused { @@ -783,7 +795,7 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { if pool.Spec.Configuration.Name != pool.Status.Configuration.Name { ctrl.setPendingFileMetrics(pool) } - return ctrl.syncStatusOnly(pool) + return ctrl.syncStatusOnly(ctx, pool) } // We aren't paused anymore, so reset the metrics @@ -791,7 +803,7 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { nodes, err := ctrl.getNodesForPool(pool) if err != nil { - if syncErr := ctrl.syncStatusOnly(pool); syncErr != nil { + if syncErr := ctrl.syncStatusOnly(ctx, pool); syncErr != nil { errs := kubeErrs.NewAggregate([]error{syncErr, err}) return fmt.Errorf("error getting nodes for pool %q, sync error: %w", pool.Name, errs) } @@ -800,18 +812,17 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { maxunavail, err := maxUnavailable(pool, nodes) if err != nil { - if syncErr := ctrl.syncStatusOnly(pool); syncErr != nil { + if syncErr := ctrl.syncStatusOnly(ctx, pool); syncErr != nil { errs := kubeErrs.NewAggregate([]error{syncErr, err}) return fmt.Errorf("error getting max unavailable count for pool %q, sync error: %w", pool.Name, errs) } return err } - if err := ctrl.setClusterConfigAnnotation(nodes); err != nil { + if err := ctrl.setClusterConfigAnnotation(ctx, nodes); err != nil { return fmt.Errorf("error setting clusterConfig Annotation for node in pool %q, error: %w", pool.Name, err) } // Taint all the nodes in the node pool, irrespective of their upgrade status. - ctx := context.TODO() for _, node := range nodes { // All the nodes that need to be upgraded should have `NodeUpdateInProgressTaint` so that they're less likely // to be chosen during the scheduling cycle. @@ -843,15 +854,15 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { } } ctrl.logPool(pool, "%d candidate nodes in %d zones for update, capacity: %d", len(candidates), len(zones), capacity) - if err := ctrl.updateCandidateMachines(pool, candidates, capacity); err != nil { - if syncErr := ctrl.syncStatusOnly(pool); syncErr != nil { + if err := ctrl.updateCandidateMachines(ctx, pool, candidates, capacity); err != nil { + if syncErr := ctrl.syncStatusOnly(ctx, pool); syncErr != nil { errs := kubeErrs.NewAggregate([]error{syncErr, err}) return fmt.Errorf("error setting desired machine config annotation for pool %q, sync error: %w", pool.Name, errs) } return err } } - return ctrl.syncStatusOnly(pool) + return ctrl.syncStatusOnly(ctx, pool) } // checkIfNodeHasInProgressTaint checks if the given node has in progress taint @@ -896,7 +907,7 @@ func (ctrl *Controller) getNodesForPool(pool *mcfgv1.MachineConfigPool) ([]*core // setClusterConfigAnnotation reads cluster configs set into controllerConfig // and add/updates required annotation to node such as ControlPlaneTopology // from infrastructure object. -func (ctrl *Controller) setClusterConfigAnnotation(nodes []*corev1.Node) error { +func (ctrl *Controller) setClusterConfigAnnotation(ctx context.Context, nodes []*corev1.Node) error { cc, err := ctrl.ccLister.Get(ctrlcommon.ControllerConfigName) if err != nil { return err @@ -905,7 +916,7 @@ func (ctrl *Controller) setClusterConfigAnnotation(nodes []*corev1.Node) error { for _, node := range nodes { if node.Annotations[daemonconsts.ClusterControlPlaneTopologyAnnotationKey] != string(cc.Spec.Infra.Status.ControlPlaneTopology) { oldAnn := node.Annotations[daemonconsts.ClusterControlPlaneTopologyAnnotationKey] - _, err := internal.UpdateNodeRetry(ctrl.kubeClient.CoreV1().Nodes(), ctrl.nodeLister, node.Name, func(node *corev1.Node) { + _, err := internal.UpdateNodeRetry(ctx, ctrl.kubeClient.CoreV1().Nodes(), ctrl.nodeLister, node.Name, func(node *corev1.Node) { node.Annotations[daemonconsts.ClusterControlPlaneTopologyAnnotationKey] = string(cc.Spec.Infra.Status.ControlPlaneTopology) }) if err != nil { @@ -917,9 +928,9 @@ func (ctrl *Controller) setClusterConfigAnnotation(nodes []*corev1.Node) error { return nil } -func (ctrl *Controller) setDesiredMachineConfigAnnotation(nodeName, currentConfig string) error { +func (ctrl *Controller) setDesiredMachineConfigAnnotation(ctx context.Context, nodeName, currentConfig string) error { return clientretry.RetryOnConflict(constants.NodeUpdateBackoff, func() error { - oldNode, err := ctrl.kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + oldNode, err := ctrl.kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) if err != nil { return err } @@ -946,7 +957,7 @@ func (ctrl *Controller) setDesiredMachineConfigAnnotation(nodeName, currentConfi if err != nil { return fmt.Errorf("failed to create patch for node %q: %w", nodeName, err) } - _, err = ctrl.kubeClient.CoreV1().Nodes().Patch(context.TODO(), nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) + _, err = ctrl.kubeClient.CoreV1().Nodes().Patch(ctx, nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) return err }) } @@ -1022,7 +1033,7 @@ func (ctrl *Controller) filterControlPlaneCandidateNodes(pool *mcfgv1.MachineCon } // updateCandidateMachines sets the desiredConfig annotation the candidate machines -func (ctrl *Controller) updateCandidateMachines(pool *mcfgv1.MachineConfigPool, candidates []*corev1.Node, capacity uint) error { +func (ctrl *Controller) updateCandidateMachines(ctx context.Context, pool *mcfgv1.MachineConfigPool, candidates []*corev1.Node, capacity uint) error { if pool.Name == ctrlcommon.MachineConfigPoolMaster { var err error candidates, capacity, err = ctrl.filterControlPlaneCandidateNodes(pool, candidates, capacity) @@ -1043,7 +1054,7 @@ func (ctrl *Controller) updateCandidateMachines(pool *mcfgv1.MachineConfigPool, targetConfig := pool.Spec.Configuration.Name for _, node := range candidates { ctrl.logPool(pool, "Setting node %s target to %s", node.Name, targetConfig) - if err := ctrl.setDesiredMachineConfigAnnotation(node.Name, targetConfig); err != nil { + if err := ctrl.setDesiredMachineConfigAnnotation(ctx, node.Name, targetConfig); err != nil { return fmt.Errorf("setting desired config for node %s: %w", node.Name, err) } } @@ -1287,3 +1298,8 @@ func (ctrl *Controller) getNewestAPIToKubeletSignerCertificate(statusIgnConfig * return newestCertificate, nil } + +// Name() returns the name of this controller +func (ctrl *Controller) Name() string { + return ctrl.name +} diff --git a/pkg/controller/node/node_controller_test.go b/pkg/controller/node/node_controller_test.go index e8e57a2ed0..bf7f1f0081 100644 --- a/pkg/controller/node/node_controller_test.go +++ b/pkg/controller/node/node_controller_test.go @@ -1,6 +1,7 @@ package node import ( + "context" "encoding/json" "fmt" "reflect" @@ -125,7 +126,7 @@ func (f *fixture) runExpectError(pool string) { func (f *fixture) runController(pool string, expectError bool) { c := f.newController() - err := c.syncHandler(pool) + err := c.syncHandler(context.TODO(), pool) if !expectError && err != nil { f.t.Errorf("error syncing machineconfigpool: %v", err) } else if expectError && err == nil { @@ -778,7 +779,7 @@ func TestSetDesiredMachineConfigAnnotation(t *testing.T) { c := f.newController() - err := c.setDesiredMachineConfigAnnotation(test.node.Name, "v1") + err := c.setDesiredMachineConfigAnnotation(context.TODO(), test.node.Name, "v1") if !assert.Nil(t, err) { return } diff --git a/pkg/controller/node/status.go b/pkg/controller/node/status.go index c54e0f6e3b..662265476c 100644 --- a/pkg/controller/node/status.go +++ b/pkg/controller/node/status.go @@ -13,7 +13,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func (ctrl *Controller) syncStatusOnly(pool *mcfgv1.MachineConfigPool) error { +func (ctrl *Controller) syncStatusOnly(ctx context.Context, pool *mcfgv1.MachineConfigPool) error { nodes, err := ctrl.getNodesForPool(pool) if err != nil { return err @@ -26,7 +26,7 @@ func (ctrl *Controller) syncStatusOnly(pool *mcfgv1.MachineConfigPool) error { newPool := pool newPool.Status = newStatus - _, err = ctrl.client.MachineconfigurationV1().MachineConfigPools().UpdateStatus(context.TODO(), newPool, metav1.UpdateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigPools().UpdateStatus(ctx, newPool, metav1.UpdateOptions{}) if pool.Spec.Configuration.Name != newPool.Spec.Configuration.Name { ctrl.eventRecorder.Eventf(pool, corev1.EventTypeNormal, "Updating", "Pool %s now targeting %s", pool.Name, newPool.Spec.Configuration.Name) } From cc74f01aaa71f8eb130c97b23295a1420f49525d Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:23:52 -0500 Subject: [PATCH 12/18] Context cancellation for render controller This adds context cancellation to the functions in the render controller. This favors explicitly passing the context as the first parameter so it is apparent that the function is cancellable. --- pkg/controller/render/render_controller.go | 58 +++++++++++-------- .../render/render_controller_test.go | 3 +- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/pkg/controller/render/render_controller.go b/pkg/controller/render/render_controller.go index 4fa5d6dc50..a4f61f76df 100644 --- a/pkg/controller/render/render_controller.go +++ b/pkg/controller/render/render_controller.go @@ -52,10 +52,12 @@ var ( // Controller defines the render controller. type Controller struct { + name string + client mcfgclientset.Interface eventRecorder record.EventRecorder - syncHandler func(mcp string) error + syncHandler func(ctx context.Context, mcp string) error enqueueMachineConfigPool func(*mcfgv1.MachineConfigPool) mcpLister mcfglistersv1.MachineConfigPoolLister @@ -83,6 +85,7 @@ func New( eventBroadcaster.StartRecordingToSink(&corev1client.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) ctrl := &Controller{ + name: "RenderController", client: mcfgClient, eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "machineconfigcontroller-rendercontroller"}), queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machineconfigcontroller-rendercontroller"), @@ -113,11 +116,11 @@ func New( } // Run executes the render controller. -func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { +func (ctrl *Controller) Run(ctx context.Context, workers int) { defer utilruntime.HandleCrash() defer ctrl.queue.ShutDown() - if !cache.WaitForCacheSync(stopCh, ctrl.mcpListerSynced, ctrl.mcListerSynced, ctrl.ccListerSynced) { + if !cache.WaitForCacheSync(ctx.Done(), ctrl.mcpListerSynced, ctrl.mcListerSynced, ctrl.ccListerSynced) { return } @@ -125,10 +128,10 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { defer glog.Info("Shutting down MachineConfigController-RenderController") for i := 0; i < workers; i++ { - go wait.Until(ctrl.worker, time.Second, stopCh) + go wait.UntilWithContext(ctx, ctrl.worker, time.Second) } - <-stopCh + <-ctx.Done() } func (ctrl *Controller) addMachineConfigPool(obj interface{}) { @@ -349,19 +352,19 @@ func (ctrl *Controller) enqueueDefault(pool *mcfgv1.MachineConfigPool) { // worker runs a worker thread that just dequeues items, processes them, and marks them done. // It enforces that the syncHandler is never invoked concurrently with the same key. -func (ctrl *Controller) worker() { - for ctrl.processNextWorkItem() { +func (ctrl *Controller) worker(ctx context.Context) { + for ctrl.processNextWorkItem(ctx) { } } -func (ctrl *Controller) processNextWorkItem() bool { +func (ctrl *Controller) processNextWorkItem(ctx context.Context) bool { key, quit := ctrl.queue.Get() if quit { return false } defer ctrl.queue.Done(key) - err := ctrl.syncHandler(key.(string)) + err := ctrl.syncHandler(ctx, key.(string)) ctrl.handleErr(err, key) return true @@ -387,7 +390,7 @@ func (ctrl *Controller) handleErr(err error, key interface{}) { // syncMachineConfigPool will sync the machineconfig pool with the given key. // This function is not meant to be invoked concurrently with the same key. -func (ctrl *Controller) syncMachineConfigPool(key string) error { +func (ctrl *Controller) syncMachineConfigPool(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing machineconfigpool %q (%v)", key, startTime) defer func() { @@ -431,32 +434,32 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { return err } if len(mcs) == 0 { - return ctrl.syncFailingStatus(pool, fmt.Errorf("no MachineConfigs found matching selector %v", selector)) + return ctrl.syncFailingStatus(ctx, pool, fmt.Errorf("no MachineConfigs found matching selector %v", selector)) } - if err := ctrl.syncGeneratedMachineConfig(pool, mcs); err != nil { - return ctrl.syncFailingStatus(pool, err) + if err := ctrl.syncGeneratedMachineConfig(ctx, pool, mcs); err != nil { + return ctrl.syncFailingStatus(ctx, pool, err) } - return ctrl.syncAvailableStatus(pool) + return ctrl.syncAvailableStatus(ctx, pool) } -func (ctrl *Controller) syncAvailableStatus(pool *mcfgv1.MachineConfigPool) error { +func (ctrl *Controller) syncAvailableStatus(ctx context.Context, pool *mcfgv1.MachineConfigPool) error { if mcfgv1.IsMachineConfigPoolConditionFalse(pool.Status.Conditions, mcfgv1.MachineConfigPoolRenderDegraded) { return nil } sdegraded := mcfgv1.NewMachineConfigPoolCondition(mcfgv1.MachineConfigPoolRenderDegraded, corev1.ConditionFalse, "", "") mcfgv1.SetMachineConfigPoolCondition(&pool.Status, *sdegraded) - if _, err := ctrl.client.MachineconfigurationV1().MachineConfigPools().UpdateStatus(context.TODO(), pool, metav1.UpdateOptions{}); err != nil { + if _, err := ctrl.client.MachineconfigurationV1().MachineConfigPools().UpdateStatus(ctx, pool, metav1.UpdateOptions{}); err != nil { return err } return nil } -func (ctrl *Controller) syncFailingStatus(pool *mcfgv1.MachineConfigPool, err error) error { +func (ctrl *Controller) syncFailingStatus(ctx context.Context, pool *mcfgv1.MachineConfigPool, err error) error { sdegraded := mcfgv1.NewMachineConfigPoolCondition(mcfgv1.MachineConfigPoolRenderDegraded, corev1.ConditionTrue, "", fmt.Sprintf("Failed to render configuration for pool %s: %v", pool.Name, err)) mcfgv1.SetMachineConfigPoolCondition(&pool.Status, *sdegraded) - if _, updateErr := ctrl.client.MachineconfigurationV1().MachineConfigPools().UpdateStatus(context.TODO(), pool, metav1.UpdateOptions{}); updateErr != nil { + if _, updateErr := ctrl.client.MachineconfigurationV1().MachineConfigPools().UpdateStatus(ctx, pool, metav1.UpdateOptions{}); updateErr != nil { glog.Errorf("Error updating MachineConfigPool %s: %v", pool.Name, updateErr) } return err @@ -466,13 +469,13 @@ func (ctrl *Controller) syncFailingStatus(pool *mcfgv1.MachineConfigPool, err er // see https://github.com/openshift/machine-config-operator/issues/301 // It will probably involve making sure we're only GCing a config after all nodes don't have it // in either desired or current config. -func (ctrl *Controller) garbageCollectRenderedConfigs(pool *mcfgv1.MachineConfigPool) error { +func (ctrl *Controller) garbageCollectRenderedConfigs(ctx context.Context, pool *mcfgv1.MachineConfigPool) error { // Temporarily until https://github.com/openshift/machine-config-operator/pull/318 // which depends on the strategy for https://github.com/openshift/machine-config-operator/issues/301 return nil } -func (ctrl *Controller) syncGeneratedMachineConfig(pool *mcfgv1.MachineConfigPool, configs []*mcfgv1.MachineConfig) error { +func (ctrl *Controller) syncGeneratedMachineConfig(ctx context.Context, pool *mcfgv1.MachineConfigPool, configs []*mcfgv1.MachineConfig) error { if len(configs) == 0 { return nil } @@ -494,7 +497,7 @@ func (ctrl *Controller) syncGeneratedMachineConfig(pool *mcfgv1.MachineConfigPoo _, err = ctrl.mcLister.Get(generated.Name) if apierrors.IsNotFound(err) { - _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(context.TODO(), generated, metav1.CreateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigs().Create(ctx, generated, metav1.CreateOptions{}) if err != nil { return err } @@ -510,23 +513,23 @@ func (ctrl *Controller) syncGeneratedMachineConfig(pool *mcfgv1.MachineConfigPoo newPool.Spec.Configuration.Source = source if pool.Spec.Configuration.Name == generated.Name { - _, _, err = mcoResourceApply.ApplyMachineConfig(ctrl.client.MachineconfigurationV1(), generated) + _, _, err = mcoResourceApply.ApplyMachineConfig(ctx, ctrl.client.MachineconfigurationV1(), generated) if err != nil { return err } - _, err = ctrl.client.MachineconfigurationV1().MachineConfigPools().Update(context.TODO(), newPool, metav1.UpdateOptions{}) + _, err = ctrl.client.MachineconfigurationV1().MachineConfigPools().Update(ctx, newPool, metav1.UpdateOptions{}) return err } newPool.Spec.Configuration.Name = generated.Name // TODO(walters) Use subresource or JSON patch, but the latter isn't supported by the unit test mocks - pool, err = ctrl.client.MachineconfigurationV1().MachineConfigPools().Update(context.TODO(), newPool, metav1.UpdateOptions{}) + pool, err = ctrl.client.MachineconfigurationV1().MachineConfigPools().Update(ctx, newPool, metav1.UpdateOptions{}) if err != nil { return err } glog.V(2).Infof("Pool %s: now targeting: %s", pool.Name, pool.Spec.Configuration.Name) - if err := ctrl.garbageCollectRenderedConfigs(pool); err != nil { + if err := ctrl.garbageCollectRenderedConfigs(ctx, pool); err != nil { return err } @@ -630,3 +633,8 @@ func getMachineConfigsForPool(pool *mcfgv1.MachineConfigPool, configs []*mcfgv1. } return out, nil } + +// Name() returns the name of this controller +func (ctrl *Controller) Name() string { + return ctrl.name +} diff --git a/pkg/controller/render/render_controller_test.go b/pkg/controller/render/render_controller_test.go index 6fa13b3a87..d7343e34ad 100644 --- a/pkg/controller/render/render_controller_test.go +++ b/pkg/controller/render/render_controller_test.go @@ -1,6 +1,7 @@ package render import ( + "context" "fmt" "reflect" "testing" @@ -105,7 +106,7 @@ func (f *fixture) runExpectError(mcpname string) { func (f *fixture) runController(mcpname string, expectError bool) { c := f.newController() - err := c.syncHandler(mcpname) + err := c.syncHandler(context.TODO(), mcpname) if !expectError && err != nil { f.t.Errorf("error syncing machineconfigpool: %v", err) } else if expectError && err == nil { From 01f0d7126cb3106de5052f470776809a270ff3a5 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:26:57 -0500 Subject: [PATCH 13/18] Context cancellation for template controller This adds context cancellation to the functions in the template controller. This favors explicitly passing the context as the first parameter so it is apparent that the function is cancellable. --- pkg/controller/template/status.go | 16 +++---- .../template/template_controller.go | 46 +++++++++++-------- .../template/template_controller_test.go | 3 +- 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/pkg/controller/template/status.go b/pkg/controller/template/status.go index 18f99bd32e..f4aa440caa 100644 --- a/pkg/controller/template/status.go +++ b/pkg/controller/template/status.go @@ -17,7 +17,7 @@ import ( // - sets `running` condition to `true`. // - reset the `available` condition to `false` when we are syncing to new generation. // - does not modify `failing` condition. -func (ctrl *Controller) syncRunningStatus(ctrlconfig *mcfgv1.ControllerConfig) error { +func (ctrl *Controller) syncRunningStatus(ctx context.Context, ctrlconfig *mcfgv1.ControllerConfig) error { updateFunc := func(cfg *mcfgv1.ControllerConfig) error { reason := fmt.Sprintf("syncing towards (%d) generation using controller version %s", cfg.GetGeneration(), version.Raw) rcond := mcfgv1.NewControllerConfigStatusCondition(mcfgv1.TemplateControllerRunning, corev1.ConditionTrue, "", reason) @@ -29,13 +29,13 @@ func (ctrl *Controller) syncRunningStatus(ctrlconfig *mcfgv1.ControllerConfig) e cfg.Status.ObservedGeneration = ctrlconfig.GetGeneration() return nil } - return updateControllerConfigStatus(ctrlconfig.GetName(), ctrl.ccLister.Get, ctrl.client.MachineconfigurationV1().ControllerConfigs(), updateFunc) + return updateControllerConfigStatus(ctx, ctrlconfig.GetName(), ctrl.ccLister.Get, ctrl.client.MachineconfigurationV1().ControllerConfigs(), updateFunc) } // - resets `running` condition to `false` // - resets `completed` condition to `false` // - sets the `failing` condition to `true` using the `oerr` -func (ctrl *Controller) syncFailingStatus(ctrlconfig *mcfgv1.ControllerConfig, oerr error) error { +func (ctrl *Controller) syncFailingStatus(ctx context.Context, ctrlconfig *mcfgv1.ControllerConfig, oerr error) error { if oerr == nil { return nil } @@ -50,7 +50,7 @@ func (ctrl *Controller) syncFailingStatus(ctrlconfig *mcfgv1.ControllerConfig, o cfg.Status.ObservedGeneration = ctrlconfig.GetGeneration() return nil } - if err := updateControllerConfigStatus(ctrlconfig.GetName(), ctrl.ccLister.Get, ctrl.client.MachineconfigurationV1().ControllerConfigs(), updateFunc); err != nil { + if err := updateControllerConfigStatus(ctx, ctrlconfig.GetName(), ctrl.ccLister.Get, ctrl.client.MachineconfigurationV1().ControllerConfigs(), updateFunc); err != nil { return fmt.Errorf("failed to sync status for %v", oerr) } return oerr @@ -59,7 +59,7 @@ func (ctrl *Controller) syncFailingStatus(ctrlconfig *mcfgv1.ControllerConfig, o // - resets `running` condition to `false` // - resets `failing` condition to `false` // - sets the `completed` condition to `true` -func (ctrl *Controller) syncCompletedStatus(ctrlconfig *mcfgv1.ControllerConfig) error { +func (ctrl *Controller) syncCompletedStatus(ctx context.Context, ctrlconfig *mcfgv1.ControllerConfig) error { updateFunc := func(cfg *mcfgv1.ControllerConfig) error { reason := fmt.Sprintf("sync completed towards (%d) generation using controller version %s", cfg.GetGeneration(), version.Raw) acond := mcfgv1.NewControllerConfigStatusCondition(mcfgv1.TemplateControllerCompleted, corev1.ConditionTrue, "", reason) @@ -71,12 +71,12 @@ func (ctrl *Controller) syncCompletedStatus(ctrlconfig *mcfgv1.ControllerConfig) cfg.Status.ObservedGeneration = ctrlconfig.GetGeneration() return nil } - return updateControllerConfigStatus(ctrlconfig.GetName(), ctrl.ccLister.Get, ctrl.client.MachineconfigurationV1().ControllerConfigs(), updateFunc) + return updateControllerConfigStatus(ctx, ctrlconfig.GetName(), ctrl.ccLister.Get, ctrl.client.MachineconfigurationV1().ControllerConfigs(), updateFunc) } type updateControllerConfigStatusFunc func(*mcfgv1.ControllerConfig) error -func updateControllerConfigStatus(name string, +func updateControllerConfigStatus(ctx context.Context, name string, controllerConfigGetter func(name string) (*mcfgv1.ControllerConfig, error), client mcfgclientv1.ControllerConfigInterface, updateFuncs ...updateControllerConfigStatusFunc) error { @@ -95,7 +95,7 @@ func updateControllerConfigStatus(name string, if equality.Semantic.DeepEqual(old, new) { return nil } - _, err = client.UpdateStatus(context.TODO(), new, metav1.UpdateOptions{}) + _, err = client.UpdateStatus(ctx, new, metav1.UpdateOptions{}) return err }) } diff --git a/pkg/controller/template/template_controller.go b/pkg/controller/template/template_controller.go index a474778ceb..be779fd992 100644 --- a/pkg/controller/template/template_controller.go +++ b/pkg/controller/template/template_controller.go @@ -49,13 +49,15 @@ var controllerKind = mcfgv1.SchemeGroupVersion.WithKind("ControllerConfig") // Controller defines the template controller type Controller struct { + name string + templatesDir string client mcfgclientset.Interface kubeClient clientset.Interface eventRecorder record.EventRecorder - syncHandler func(ccKey string) error + syncHandler func(ctx context.Context, ccKey string) error enqueueControllerConfig func(*mcfgv1.ControllerConfig) ccLister mcfglistersv1.ControllerConfigLister @@ -85,6 +87,7 @@ func New( eventBroadcaster.StartRecordingToSink(&corev1clientset.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) ctrl := &Controller{ + name: "TemplateController", templatesDir: templatesDir, client: mcfgClient, kubeClient: kubeClient, @@ -227,11 +230,11 @@ func (ctrl *Controller) deleteFeature(obj interface{}) { } // Run executes the template controller -func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { +func (ctrl *Controller) Run(ctx context.Context, workers int) { defer utilruntime.HandleCrash() defer ctrl.queue.ShutDown() - if !cache.WaitForCacheSync(stopCh, ctrl.ccListerSynced, ctrl.mcListerSynced, ctrl.secretsInformerSynced, ctrl.featListerSynced) { + if !cache.WaitForCacheSync(ctx.Done(), ctrl.ccListerSynced, ctrl.mcListerSynced, ctrl.secretsInformerSynced, ctrl.featListerSynced) { return } @@ -239,10 +242,10 @@ func (ctrl *Controller) Run(workers int, stopCh <-chan struct{}) { defer glog.Info("Shutting down MachineConfigController-TemplateController") for i := 0; i < workers; i++ { - go wait.Until(ctrl.worker, time.Second, stopCh) + go wait.UntilWithContext(ctx, ctrl.worker, time.Second) } - <-stopCh + <-ctx.Done() } func (ctrl *Controller) addControllerConfig(obj interface{}) { @@ -393,19 +396,19 @@ func (ctrl *Controller) enqueueAfter(controllerconfig *mcfgv1.ControllerConfig, // worker runs a worker thread that just dequeues items, processes them, and marks them done. // It enforces that the syncHandler is never invoked concurrently with the same key. -func (ctrl *Controller) worker() { - for ctrl.processNextWorkItem() { +func (ctrl *Controller) worker(ctx context.Context) { + for ctrl.processNextWorkItem(ctx) { } } -func (ctrl *Controller) processNextWorkItem() bool { +func (ctrl *Controller) processNextWorkItem(ctx context.Context) bool { key, quit := ctrl.queue.Get() if quit { return false } defer ctrl.queue.Done(key) - err := ctrl.syncHandler(key.(string)) + err := ctrl.syncHandler(ctx, key.(string)) ctrl.handleErr(err, key) return true @@ -431,7 +434,7 @@ func (ctrl *Controller) handleErr(err error, key interface{}) { // syncControllerConfig will sync the controller config with the given key. // This function is not meant to be invoked concurrently with the same key. -func (ctrl *Controller) syncControllerConfig(key string) error { +func (ctrl *Controller) syncControllerConfig(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing controllerconfig %q (%v)", key, startTime) defer func() { @@ -456,20 +459,20 @@ func (ctrl *Controller) syncControllerConfig(key string) error { cfg := controllerconfig.DeepCopy() if cfg.GetGeneration() != cfg.Status.ObservedGeneration { - if err := ctrl.syncRunningStatus(cfg); err != nil { + if err := ctrl.syncRunningStatus(ctx, cfg); err != nil { return err } } var pullSecretRaw []byte if cfg.Spec.PullSecret != nil { - secret, err := ctrl.kubeClient.CoreV1().Secrets(cfg.Spec.PullSecret.Namespace).Get(context.TODO(), cfg.Spec.PullSecret.Name, metav1.GetOptions{}) + secret, err := ctrl.kubeClient.CoreV1().Secrets(cfg.Spec.PullSecret.Namespace).Get(ctx, cfg.Spec.PullSecret.Name, metav1.GetOptions{}) if err != nil { - return ctrl.syncFailingStatus(cfg, err) + return ctrl.syncFailingStatus(ctx, cfg, err) } if secret.Type != corev1.SecretTypeDockerConfigJson { - return ctrl.syncFailingStatus(cfg, fmt.Errorf("expected secret type %s found %s", corev1.SecretTypeDockerConfigJson, secret.Type)) + return ctrl.syncFailingStatus(ctx, cfg, fmt.Errorf("expected secret type %s found %s", corev1.SecretTypeDockerConfigJson, secret.Type)) } pullSecretRaw = secret.Data[corev1.DockerConfigJsonKey] } @@ -478,24 +481,24 @@ func (ctrl *Controller) syncControllerConfig(key string) error { if err != nil && !errors.IsNotFound(err) { err := fmt.Errorf("could not fetch FeatureGate: %w", err) glog.V(2).Infof("%v", err) - return ctrl.syncFailingStatus(cfg, err) + return ctrl.syncFailingStatus(ctx, cfg, err) } mcs, err := getMachineConfigsForControllerConfig(ctrl.templatesDir, cfg, pullSecretRaw, fg) if err != nil { - return ctrl.syncFailingStatus(cfg, err) + return ctrl.syncFailingStatus(ctx, cfg, err) } for _, mc := range mcs { - _, updated, err := mcoResourceApply.ApplyMachineConfig(ctrl.client.MachineconfigurationV1(), mc) + _, updated, err := mcoResourceApply.ApplyMachineConfig(ctx, ctrl.client.MachineconfigurationV1(), mc) if err != nil { - return ctrl.syncFailingStatus(cfg, err) + return ctrl.syncFailingStatus(ctx, cfg, err) } if updated { glog.V(4).Infof("Machineconfig %s was updated", mc.Name) } } - return ctrl.syncCompletedStatus(cfg) + return ctrl.syncCompletedStatus(ctx, cfg) } func getMachineConfigsForControllerConfig(templatesDir string, config *mcfgv1.ControllerConfig, pullSecretRaw []byte, featureGate *configv1.FeatureGate) ([]*mcfgv1.MachineConfig, error) { @@ -526,3 +529,8 @@ func getMachineConfigsForControllerConfig(templatesDir string, config *mcfgv1.Co func RunBootstrap(templatesDir string, config *mcfgv1.ControllerConfig, pullSecretRaw []byte, featureGate *configv1.FeatureGate) ([]*mcfgv1.MachineConfig, error) { return getMachineConfigsForControllerConfig(templatesDir, config, pullSecretRaw, featureGate) } + +// Name() returns the name of this controller +func (ctrl *Controller) Name() string { + return ctrl.name +} diff --git a/pkg/controller/template/template_controller_test.go b/pkg/controller/template/template_controller_test.go index 88e9ad1cf2..f32a07646c 100644 --- a/pkg/controller/template/template_controller_test.go +++ b/pkg/controller/template/template_controller_test.go @@ -1,6 +1,7 @@ package template import ( + "context" "fmt" "reflect" "testing" @@ -147,7 +148,7 @@ func (f *fixture) runExpectError(ccname string) { func (f *fixture) runController(ccname string, expectError bool) { c := f.newController() - err := c.syncHandler(ccname) + err := c.syncHandler(context.TODO(), ccname) if !expectError && err != nil { f.t.Errorf("error syncing controllerconfig: %v", err) } else if expectError && err == nil { From f697d4496b00ffbd8ecfe1b8f5481de052941b0b Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:27:52 -0500 Subject: [PATCH 14/18] Context cancellation for operator This adds context cancellation to the functions in the operator. This favors explicitly passing the context as the first parameter so it is apparent that the function is cancellable. --- pkg/operator/operator.go | 30 +++++------ pkg/operator/status.go | 72 ++++++++++++------------- pkg/operator/status_test.go | 58 ++++++++++---------- pkg/operator/sync.go | 103 ++++++++++++++++++------------------ 4 files changed, 132 insertions(+), 131 deletions(-) diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 6a5743ff9f..6f4c233c09 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -70,7 +70,7 @@ type Operator struct { eventRecorder record.EventRecorder libgoRecorder events.Recorder - syncHandler func(ic string) error + syncHandler func(ctx context.Context, ic string) error crdLister apiextlistersv1.CustomResourceDefinitionLister mcpLister mcfglistersv1.MachineConfigPoolLister @@ -109,7 +109,7 @@ type Operator struct { // queue only ever has one item, but it has nice error handling backoff/retry semantics queue workqueue.RateLimitingInterface - stopCh <-chan struct{} + ctx context.Context renderConfig *renderConfig } @@ -226,12 +226,12 @@ func New( } // Run runs the machine config operator. -func (optr *Operator) Run(workers int, stopCh <-chan struct{}) { +func (optr *Operator) Run(ctx context.Context, workers int) { defer utilruntime.HandleCrash() defer optr.queue.ShutDown() apiClient := optr.apiExtClient.ApiextensionsV1() - _, err := apiClient.CustomResourceDefinitions().Get(context.TODO(), "controllerconfigs.machineconfiguration.openshift.io", metav1.GetOptions{}) + _, err := apiClient.CustomResourceDefinitions().Get(ctx, "controllerconfigs.machineconfiguration.openshift.io", metav1.GetOptions{}) if err != nil { if apierrors.IsNotFound(err) { glog.Infof("Couldn't find controllerconfig CRD, in cluster bringup mode") @@ -241,7 +241,7 @@ func (optr *Operator) Run(workers int, stopCh <-chan struct{}) { } } - if !cache.WaitForCacheSync(stopCh, + if !cache.WaitForCacheSync(ctx.Done(), optr.crdListerSynced, optr.deployListerSynced, optr.daemonsetListerSynced, @@ -265,7 +265,7 @@ func (optr *Operator) Run(workers int, stopCh <-chan struct{}) { // these can only be synced after CRDs are installed if !optr.inClusterBringup { - if !cache.WaitForCacheSync(stopCh, + if !cache.WaitForCacheSync(ctx.Done(), optr.ccListerSynced, ) { glog.Error("failed to sync caches") @@ -276,13 +276,13 @@ func (optr *Operator) Run(workers int, stopCh <-chan struct{}) { glog.Info("Starting MachineConfigOperator") defer glog.Info("Shutting down MachineConfigOperator") - optr.stopCh = stopCh + optr.ctx = ctx for i := 0; i < workers; i++ { - go wait.Until(optr.worker, time.Second, stopCh) + go wait.UntilWithContext(ctx, optr.worker, time.Second) } - <-stopCh + <-ctx.Done() } func (optr *Operator) enqueue(obj interface{}) { @@ -309,19 +309,19 @@ func (optr *Operator) eventHandler() cache.ResourceEventHandler { } } -func (optr *Operator) worker() { - for optr.processNextWorkItem() { +func (optr *Operator) worker(ctx context.Context) { + for optr.processNextWorkItem(ctx) { } } -func (optr *Operator) processNextWorkItem() bool { +func (optr *Operator) processNextWorkItem(ctx context.Context) bool { key, quit := optr.queue.Get() if quit { return false } defer optr.queue.Done(key) - err := optr.syncHandler(key.(string)) + err := optr.syncHandler(ctx, key.(string)) optr.handleErr(err, key) return true @@ -345,7 +345,7 @@ func (optr *Operator) handleErr(err error, key interface{}) { optr.queue.AddAfter(key, 1*time.Minute) } -func (optr *Operator) sync(key string) error { +func (optr *Operator) sync(ctx context.Context, key string) error { startTime := time.Now() glog.V(4).Infof("Started syncing operator %q (%v)", key, startTime) defer func() { @@ -365,5 +365,5 @@ func (optr *Operator) sync(key string) error { // this check must always run last since it makes sure the pools are in sync/upgrading correctly {"RequiredPools", optr.syncRequiredMachineConfigPools}, } - return optr.syncAll(syncFuncs) + return optr.syncAll(ctx, syncFuncs) } diff --git a/pkg/operator/status.go b/pkg/operator/status.go index ca19af506f..3b622c3fd6 100644 --- a/pkg/operator/status.go +++ b/pkg/operator/status.go @@ -23,8 +23,8 @@ import ( ) // syncVersion handles reporting the version to the clusteroperator -func (optr *Operator) syncVersion() error { - co, err := optr.fetchClusterOperator() +func (optr *Operator) syncVersion(ctx context.Context) error { + co, err := optr.fetchClusterOperator(ctx) if err != nil { return err } @@ -50,13 +50,13 @@ func (optr *Operator) syncVersion() error { co.Status.Versions = optr.vStore.GetAll() // TODO(runcom): abstract below with updateStatus optr.setOperatorStatusExtension(&co.Status, nil) - _, err = optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(context.TODO(), co, metav1.UpdateOptions{}) + _, err = optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(ctx, co, metav1.UpdateOptions{}) return err } // syncRelatedObjects handles reporting the relatedObjects to the clusteroperator -func (optr *Operator) syncRelatedObjects() error { - co, err := optr.fetchClusterOperator() +func (optr *Operator) syncRelatedObjects(ctx context.Context) error { + co, err := optr.fetchClusterOperator(ctx) if err != nil { return err } @@ -84,7 +84,7 @@ func (optr *Operator) syncRelatedObjects() error { } if !equality.Semantic.DeepEqual(coCopy.Status.RelatedObjects, co.Status.RelatedObjects) { - _, err := optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(context.TODO(), co, metav1.UpdateOptions{}) + _, err := optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(ctx, co, metav1.UpdateOptions{}) return err } @@ -92,8 +92,8 @@ func (optr *Operator) syncRelatedObjects() error { } // syncAvailableStatus applies the new condition to the mco's ClusterOperator object. -func (optr *Operator) syncAvailableStatus(ierr syncError) error { - co, err := optr.fetchClusterOperator() +func (optr *Operator) syncAvailableStatus(ctx context.Context, ierr syncError) error { + co, err := optr.fetchClusterOperator(ctx) if err != nil { return err } @@ -126,12 +126,12 @@ func (optr *Operator) syncAvailableStatus(ierr syncError) error { Message: message, } - return optr.updateStatus(co, coStatus) + return optr.updateStatus(ctx, co, coStatus) } // syncProgressingStatus applies the new condition to the mco's ClusterOperator object. -func (optr *Operator) syncProgressingStatus() error { - co, err := optr.fetchClusterOperator() +func (optr *Operator) syncProgressingStatus(ctx context.Context) error { + co, err := optr.fetchClusterOperator(ctx) if err != nil { return err } @@ -170,13 +170,13 @@ func (optr *Operator) syncProgressingStatus() error { coStatus.Status = configv1.ConditionTrue } - return optr.updateStatus(co, coStatus) + return optr.updateStatus(ctx, co, coStatus) } -func (optr *Operator) updateStatus(co *configv1.ClusterOperator, status configv1.ClusterOperatorStatusCondition) error { +func (optr *Operator) updateStatus(ctx context.Context, co *configv1.ClusterOperator, status configv1.ClusterOperatorStatusCondition) error { cov1helpers.SetStatusCondition(&co.Status.Conditions, status) optr.setOperatorStatusExtension(&co.Status, nil) - _, err := optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(context.TODO(), co, metav1.UpdateOptions{}) + _, err := optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(ctx, co, metav1.UpdateOptions{}) return err } @@ -184,8 +184,8 @@ const ( asExpectedReason = "AsExpected" ) -func (optr *Operator) clearDegradedStatus(task string) error { - co, err := optr.fetchClusterOperator() +func (optr *Operator) clearDegradedStatus(ctx context.Context, task string) error { + co, err := optr.fetchClusterOperator(ctx) if err != nil { return err } @@ -202,12 +202,12 @@ func (optr *Operator) clearDegradedStatus(task string) error { if degradedStatusCondition.Reason != task+"Failed" { return nil } - return optr.syncDegradedStatus(syncError{}) + return optr.syncDegradedStatus(ctx, syncError{}) } // syncDegradedStatus applies the new condition to the mco's ClusterOperator object. -func (optr *Operator) syncDegradedStatus(ierr syncError) (err error) { - co, err := optr.fetchClusterOperator() +func (optr *Operator) syncDegradedStatus(ctx context.Context, ierr syncError) (err error) { + co, err := optr.fetchClusterOperator(ctx) if err != nil { return err } @@ -260,7 +260,7 @@ func (optr *Operator) syncDegradedStatus(ierr syncError) (err error) { Reason: reason, } - return optr.updateStatus(co, coStatus) + return optr.updateStatus(ctx, co, coStatus) } const ( @@ -271,8 +271,8 @@ const ( ) // syncUpgradeableStatus applies the new condition to the mco's ClusterOperator object. -func (optr *Operator) syncUpgradeableStatus() error { - co, err := optr.fetchClusterOperator() +func (optr *Operator) syncUpgradeableStatus(ctx context.Context) error { + co, err := optr.fetchClusterOperator(ctx) if err != nil { return err } @@ -318,18 +318,18 @@ func (optr *Operator) syncUpgradeableStatus() error { // don't overwrite status if updating or degraded if !updating && !degraded { - skewStatus, status, err := optr.isKubeletSkewSupported(pools) + skewStatus, status, err := optr.isKubeletSkewSupported(ctx, pools) if err != nil { glog.Errorf("Error checking version skew: %v, kubelet skew status: %v, status reason: %v, status message: %v", err, skewStatus, status.Reason, status.Message) coStatus.Reason = status.Reason coStatus.Message = status.Message - return optr.updateStatus(co, coStatus) + return optr.updateStatus(ctx, co, coStatus) } switch skewStatus { case skewUnchecked: coStatus.Reason = status.Reason coStatus.Message = status.Message - return optr.updateStatus(co, coStatus) + return optr.updateStatus(ctx, co, coStatus) case skewUnsupported: coStatus.Reason = status.Reason coStatus.Message = status.Message @@ -341,22 +341,22 @@ func (optr *Operator) syncUpgradeableStatus() error { } glog.Infof("kubelet skew status: %v, status reason: %v", skewStatus, status.Reason) optr.eventRecorder.Eventf(mcoObjectRef, corev1.EventTypeWarning, coStatus.Reason, coStatus.Message) - return optr.updateStatus(co, coStatus) + return optr.updateStatus(ctx, co, coStatus) case skewPresent: coStatus.Reason = status.Reason coStatus.Message = status.Message glog.Infof("kubelet skew status: %v, status reason: %v", skewStatus, status.Reason) - return optr.updateStatus(co, coStatus) + return optr.updateStatus(ctx, co, coStatus) } } - return optr.updateStatus(co, coStatus) + return optr.updateStatus(ctx, co, coStatus) } // isKubeletSkewSupported checks the version skew of kube-apiserver and node kubelet version. // Returns the skew status. version skew > 2 is not supported. -func (optr *Operator) isKubeletSkewSupported(pools []*v1.MachineConfigPool) (skewStatus string, coStatus configv1.ClusterOperatorStatusCondition, err error) { +func (optr *Operator) isKubeletSkewSupported(ctx context.Context, pools []*v1.MachineConfigPool) (skewStatus string, coStatus configv1.ClusterOperatorStatusCondition, err error) { coStatus = configv1.ClusterOperatorStatusCondition{} - kubeAPIServerStatus, err := optr.configClient.ConfigV1().ClusterOperators().Get(context.TODO(), "kube-apiserver", metav1.GetOptions{}) + kubeAPIServerStatus, err := optr.configClient.ConfigV1().ClusterOperators().Get(ctx, "kube-apiserver", metav1.GetOptions{}) if err != nil { coStatus.Reason = skewUnchecked coStatus.Message = fmt.Sprintf("An error occurred when checking kubelet version skew: %v", err) @@ -478,13 +478,13 @@ func getMinimalSkewSupportNodeVersion(version string) string { return version } -func (optr *Operator) fetchClusterOperator() (*configv1.ClusterOperator, error) { - co, err := optr.configClient.ConfigV1().ClusterOperators().Get(context.TODO(), optr.name, metav1.GetOptions{}) +func (optr *Operator) fetchClusterOperator(ctx context.Context) (*configv1.ClusterOperator, error) { + co, err := optr.configClient.ConfigV1().ClusterOperators().Get(ctx, optr.name, metav1.GetOptions{}) if meta.IsNoMatchError(err) { return nil, nil } if apierrors.IsNotFound(err) { - return optr.initializeClusterOperator() + return optr.initializeClusterOperator(ctx) } if err != nil { return nil, err @@ -493,8 +493,8 @@ func (optr *Operator) fetchClusterOperator() (*configv1.ClusterOperator, error) return coCopy, nil } -func (optr *Operator) initializeClusterOperator() (*configv1.ClusterOperator, error) { - co, err := optr.configClient.ConfigV1().ClusterOperators().Create(context.TODO(), &configv1.ClusterOperator{ +func (optr *Operator) initializeClusterOperator(ctx context.Context) (*configv1.ClusterOperator, error) { + co, err := optr.configClient.ConfigV1().ClusterOperators().Create(ctx, &configv1.ClusterOperator{ ObjectMeta: metav1.ObjectMeta{ Name: optr.name, }, @@ -523,7 +523,7 @@ func (optr *Operator) initializeClusterOperator() (*configv1.ClusterOperator, er // For both normal runs and upgrades, this code isn't hit and we get the right version every // time. This also only contains the operator RELEASE_VERSION when we're here. co.Status.Versions = optr.vStore.GetAll() - return optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(context.TODO(), co, metav1.UpdateOptions{}) + return optr.configClient.ConfigV1().ClusterOperators().UpdateStatus(ctx, co, metav1.UpdateOptions{}) } // setOperatorStatusExtension sets the raw extension field of the clusteroperator. Today, we set diff --git a/pkg/operator/status_test.go b/pkg/operator/status_test.go index 75aebcc261..069d668b59 100644 --- a/pkg/operator/status_test.go +++ b/pkg/operator/status_test.go @@ -241,7 +241,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return errors.New("got err") }, + fn: func(ctx context.Context, config *renderConfig) error { return errors.New("got err") }, }, }, expectOperatorFail: true, @@ -270,7 +270,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return nil }, + fn: func(ctx context.Context, config *renderConfig) error { return nil }, }, }, }, @@ -303,7 +303,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return nil }, + fn: func(ctx context.Context, config *renderConfig) error { return nil }, }, }, }, @@ -330,7 +330,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return nil }, + fn: func(ctx context.Context, config *renderConfig) error { return nil }, }, }, }, @@ -363,7 +363,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return nil }, + fn: func(ctx context.Context, config *renderConfig) error { return nil }, }, }, }, @@ -396,7 +396,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return nil }, + fn: func(ctx context.Context, config *renderConfig) error { return nil }, }, }, }, @@ -426,7 +426,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return errors.New("mock error") }, + fn: func(ctx context.Context, config *renderConfig) error { return errors.New("mock error") }, }, }, }, @@ -461,7 +461,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return errors.New("error") }, + fn: func(ctx context.Context, config *renderConfig) error { return errors.New("error") }, }, }, }, @@ -492,7 +492,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return errors.New("error") }, + fn: func(ctx context.Context, config *renderConfig) error { return errors.New("error") }, }, }, }, @@ -525,7 +525,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return nil }, + fn: func(ctx context.Context, config *renderConfig) error { return nil }, }, }, }, @@ -554,7 +554,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return errors.New("error") }, + fn: func(ctx context.Context, config *renderConfig) error { return errors.New("error") }, }, }, }, @@ -581,7 +581,7 @@ func TestOperatorSyncStatus(t *testing.T) { syncFuncs: []syncFunc{ { name: "fn1", - fn: func(config *renderConfig) error { return nil }, + fn: func(ctx context.Context, config *renderConfig) error { return nil }, }, }, }, @@ -635,7 +635,7 @@ func TestOperatorSyncStatus(t *testing.T) { optr.vStore.Set("operator", "test-version") } optr.configClient = fakeconfigclientset.NewSimpleClientset(co, kasOperator) - err := optr.syncAll(sync.syncFuncs) + err := optr.syncAll(context.TODO(), sync.syncFuncs) if sync.expectOperatorFail { assert.NotNil(t, err, "test case %d, sync call %d, expected an error", idx, j) } else { @@ -695,18 +695,18 @@ func TestInClusterBringUpStayOnErr(t *testing.T) { optr.configClient = fakeconfigclientset.NewSimpleClientset(co, kasOperator) optr.inClusterBringup = true - fn1 := func(config *renderConfig) error { + fn1 := func(ctx context.Context, config *renderConfig) error { return errors.New("mocked fn1") } - err := optr.syncAll([]syncFunc{{name: "mock1", fn: fn1}}) + err := optr.syncAll(context.TODO(), []syncFunc{{name: "mock1", fn: fn1}}) assert.NotNil(t, err, "expected syncAll to fail") assert.True(t, optr.inClusterBringup) - fn1 = func(config *renderConfig) error { + fn1 = func(ctx context.Context, config *renderConfig) error { return nil } - err = optr.syncAll([]syncFunc{{name: "mock1", fn: fn1}}) + err = optr.syncAll(context.TODO(), []syncFunc{{name: "mock1", fn: fn1}}) assert.Nil(t, err, "expected syncAll to pass") assert.False(t, optr.inClusterBringup) @@ -751,18 +751,18 @@ func TestKubeletSkewUnSupported(t *testing.T) { optr.configClient = fakeClient optr.inClusterBringup = true - fn1 := func(config *renderConfig) error { + fn1 := func(ctx context.Context, config *renderConfig) error { return errors.New("mocked fn1") } - err := optr.syncAll([]syncFunc{{name: "mock1", fn: fn1}}) + err := optr.syncAll(context.TODO(), []syncFunc{{name: "mock1", fn: fn1}}) assert.NotNil(t, err, "expected syncAll to fail") assert.True(t, optr.inClusterBringup) - fn1 = func(config *renderConfig) error { + fn1 = func(ctx context.Context, config *renderConfig) error { return nil } - err = optr.syncAll([]syncFunc{{name: "mock1", fn: fn1}}) + err = optr.syncAll(context.TODO(), []syncFunc{{name: "mock1", fn: fn1}}) assert.Nil(t, err, "expected syncAll to pass") assert.False(t, optr.inClusterBringup) @@ -839,18 +839,18 @@ func TestCustomPoolKubeletSkewUnSupported(t *testing.T) { optr.configClient = fakeClient optr.inClusterBringup = true - fn1 := func(config *renderConfig) error { + fn1 := func(ctx context.Context, config *renderConfig) error { return errors.New("mocked fn1") } - err := optr.syncAll([]syncFunc{{name: "mock1", fn: fn1}}) + err := optr.syncAll(context.TODO(), []syncFunc{{name: "mock1", fn: fn1}}) assert.NotNil(t, err, "expected syncAll to fail") assert.True(t, optr.inClusterBringup) - fn1 = func(config *renderConfig) error { + fn1 = func(ctx context.Context, config *renderConfig) error { return nil } - err = optr.syncAll([]syncFunc{{name: "mock1", fn: fn1}}) + err = optr.syncAll(context.TODO(), []syncFunc{{name: "mock1", fn: fn1}}) assert.Nil(t, err, "expected syncAll to pass") assert.False(t, optr.inClusterBringup) @@ -925,18 +925,18 @@ func TestKubeletSkewSupported(t *testing.T) { optr.configClient = fakeClient optr.inClusterBringup = true - fn1 := func(config *renderConfig) error { + fn1 := func(ctx context.Context, config *renderConfig) error { return errors.New("mocked fn1") } - err := optr.syncAll([]syncFunc{{name: "mock1", fn: fn1}}) + err := optr.syncAll(context.TODO(), []syncFunc{{name: "mock1", fn: fn1}}) assert.NotNil(t, err, "expected syncAll to fail") assert.True(t, optr.inClusterBringup) - fn1 = func(config *renderConfig) error { + fn1 = func(ctx context.Context, config *renderConfig) error { return nil } - err = optr.syncAll([]syncFunc{{name: "mock1", fn: fn1}}) + err = optr.syncAll(context.TODO(), []syncFunc{{name: "mock1", fn: fn1}}) assert.Nil(t, err, "expected syncAll to pass") assert.False(t, optr.inClusterBringup) diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 488819f8de..cebde04517 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -94,7 +94,7 @@ const ( type syncFunc struct { name string - fn func(config *renderConfig) error + fn func(ctx context.Context, config *renderConfig) error } type syncError struct { @@ -102,8 +102,8 @@ type syncError struct { err error } -func (optr *Operator) syncAll(syncFuncs []syncFunc) error { - if err := optr.syncProgressingStatus(); err != nil { +func (optr *Operator) syncAll(ctx context.Context, syncFuncs []syncFunc) error { + if err := optr.syncProgressingStatus(ctx); err != nil { return fmt.Errorf("error syncing progressing status: %w", err) } @@ -112,7 +112,7 @@ func (optr *Operator) syncAll(syncFuncs []syncFunc) error { startTime := time.Now() syncErr = syncError{ task: sf.name, - err: sf.fn(optr.renderConfig), + err: sf.fn(ctx, optr.renderConfig), } if optr.inClusterBringup { glog.Infof("[init mode] synced %s in %v", sf.name, time.Since(startTime)) @@ -120,28 +120,28 @@ func (optr *Operator) syncAll(syncFuncs []syncFunc) error { if syncErr.err != nil { break } - if err := optr.clearDegradedStatus(sf.name); err != nil { + if err := optr.clearDegradedStatus(ctx, sf.name); err != nil { return fmt.Errorf("error clearing degraded status: %w", err) } } - if err := optr.syncDegradedStatus(syncErr); err != nil { + if err := optr.syncDegradedStatus(ctx, syncErr); err != nil { return fmt.Errorf("error syncing degraded status: %w", err) } - if err := optr.syncAvailableStatus(syncErr); err != nil { + if err := optr.syncAvailableStatus(ctx, syncErr); err != nil { return fmt.Errorf("error syncing available status: %w", err) } - if err := optr.syncUpgradeableStatus(); err != nil { + if err := optr.syncUpgradeableStatus(ctx); err != nil { return fmt.Errorf("error syncing upgradeble status: %w", err) } - if err := optr.syncVersion(); err != nil { + if err := optr.syncVersion(ctx); err != nil { return fmt.Errorf("error syncing version: %w", err) } - if err := optr.syncRelatedObjects(); err != nil { + if err := optr.syncRelatedObjects(ctx); err != nil { return fmt.Errorf("error syncing relatedObjects: %w", err) } @@ -200,15 +200,15 @@ func (optr *Operator) syncCloudConfig(spec *mcfgv1.ControllerConfigSpec, infra * } //nolint:gocyclo -func (optr *Operator) syncRenderConfig(_ *renderConfig) error { - if err := optr.syncCustomResourceDefinitions(); err != nil { +func (optr *Operator) syncRenderConfig(ctx context.Context, _ *renderConfig) error { + if err := optr.syncCustomResourceDefinitions(ctx); err != nil { return err } if optr.inClusterBringup { glog.V(4).Info("Starting inClusterBringup informers cache sync") // sync now our own informers after having installed the CRDs - if !cache.WaitForCacheSync(optr.stopCh, optr.ccListerSynced) { + if !cache.WaitForCacheSync(ctx.Done(), optr.ccListerSynced) { return fmt.Errorf("failed to sync caches for informers") } glog.V(4).Info("Finished inClusterBringup informers cache sync") @@ -366,7 +366,7 @@ func getIgnitionHost(infraStatus *configv1.InfrastructureStatus) (string, error) return ignitionHost, nil } -func (optr *Operator) syncCustomResourceDefinitions() error { +func (optr *Operator) syncCustomResourceDefinitions(ctx context.Context) error { crds := []string{ "manifests/controllerconfig.crd.yaml", } @@ -377,12 +377,12 @@ func (optr *Operator) syncCustomResourceDefinitions() error { return fmt.Errorf("error getting asset %s: %w", crd, err) } c := resourceread.ReadCustomResourceDefinitionV1OrDie(crdBytes) - _, updated, err := resourceapply.ApplyCustomResourceDefinitionV1(context.TODO(), optr.apiExtClient.ApiextensionsV1(), optr.libgoRecorder, c) + _, updated, err := resourceapply.ApplyCustomResourceDefinitionV1(ctx, optr.apiExtClient.ApiextensionsV1(), optr.libgoRecorder, c) if err != nil { return err } if updated { - if err := optr.waitForCustomResourceDefinition(c); err != nil { + if err := optr.waitForCustomResourceDefinition(ctx, c); err != nil { return err } } @@ -391,7 +391,7 @@ func (optr *Operator) syncCustomResourceDefinitions() error { return nil } -func (optr *Operator) syncMachineConfigPools(config *renderConfig) error { +func (optr *Operator) syncMachineConfigPools(ctx context.Context, config *renderConfig) error { mcps := []string{ "manifests/master.machineconfigpool.yaml", "manifests/worker.machineconfigpool.yaml", @@ -403,7 +403,7 @@ func (optr *Operator) syncMachineConfigPools(config *renderConfig) error { return err } p := mcoResourceRead.ReadMachineConfigPoolV1OrDie(mcpBytes) - _, _, err = mcoResourceApply.ApplyMachineConfigPool(optr.client.MachineconfigurationV1(), p) + _, _, err = mcoResourceApply.ApplyMachineConfigPool(ctx, optr.client.MachineconfigurationV1(), p) if err != nil { return err } @@ -436,7 +436,7 @@ func (optr *Operator) syncMachineConfigPools(config *renderConfig) error { return err } p := resourceread.ReadSecretV1OrDie(userdataBytes) - _, _, err = resourceapply.ApplySecret(context.TODO(), optr.kubeClient.CoreV1(), optr.libgoRecorder, p) + _, _, err = resourceapply.ApplySecret(ctx, optr.kubeClient.CoreV1(), optr.libgoRecorder, p) if err != nil { return err } @@ -445,14 +445,14 @@ func (optr *Operator) syncMachineConfigPools(config *renderConfig) error { return nil } -func (optr *Operator) applyManifests(config *renderConfig, paths manifestPaths) error { +func (optr *Operator) applyManifests(ctx context.Context, config *renderConfig, paths manifestPaths) error { for _, path := range paths.clusterRoles { crBytes, err := renderAsset(config, path) if err != nil { return err } cr := resourceread.ReadClusterRoleV1OrDie(crBytes) - _, _, err = resourceapply.ApplyClusterRole(context.TODO(), optr.kubeClient.RbacV1(), optr.libgoRecorder, cr) + _, _, err = resourceapply.ApplyClusterRole(ctx, optr.kubeClient.RbacV1(), optr.libgoRecorder, cr) if err != nil { return err } @@ -464,7 +464,7 @@ func (optr *Operator) applyManifests(config *renderConfig, paths manifestPaths) return err } rb := resourceread.ReadRoleBindingV1OrDie(rbBytes) - _, _, err = resourceapply.ApplyRoleBinding(context.TODO(), optr.kubeClient.RbacV1(), optr.libgoRecorder, rb) + _, _, err = resourceapply.ApplyRoleBinding(ctx, optr.kubeClient.RbacV1(), optr.libgoRecorder, rb) if err != nil { return err } @@ -476,7 +476,7 @@ func (optr *Operator) applyManifests(config *renderConfig, paths manifestPaths) return err } crb := resourceread.ReadClusterRoleBindingV1OrDie(crbBytes) - _, _, err = resourceapply.ApplyClusterRoleBinding(context.TODO(), optr.kubeClient.RbacV1(), optr.libgoRecorder, crb) + _, _, err = resourceapply.ApplyClusterRoleBinding(ctx, optr.kubeClient.RbacV1(), optr.libgoRecorder, crb) if err != nil { return err } @@ -488,7 +488,7 @@ func (optr *Operator) applyManifests(config *renderConfig, paths manifestPaths) return err } sa := resourceread.ReadServiceAccountV1OrDie(saBytes) - _, _, err = resourceapply.ApplyServiceAccount(context.TODO(), optr.kubeClient.CoreV1(), optr.libgoRecorder, sa) + _, _, err = resourceapply.ApplyServiceAccount(ctx, optr.kubeClient.CoreV1(), optr.libgoRecorder, sa) if err != nil { return err } @@ -500,7 +500,7 @@ func (optr *Operator) applyManifests(config *renderConfig, paths manifestPaths) return err } s := resourceread.ReadSecretV1OrDie(sBytes) - _, _, err = resourceapply.ApplySecret(context.TODO(), optr.kubeClient.CoreV1(), optr.libgoRecorder, s) + _, _, err = resourceapply.ApplySecret(ctx, optr.kubeClient.CoreV1(), optr.libgoRecorder, s) if err != nil { return err } @@ -512,19 +512,19 @@ func (optr *Operator) applyManifests(config *renderConfig, paths manifestPaths) return err } d := resourceread.ReadDaemonSetV1OrDie(dBytes) - _, updated, err := mcoResourceApply.ApplyDaemonSet(optr.kubeClient.AppsV1(), d) + _, updated, err := mcoResourceApply.ApplyDaemonSet(ctx, optr.kubeClient.AppsV1(), d) if err != nil { return err } if updated { - return optr.waitForDaemonsetRollout(d) + return optr.waitForDaemonsetRollout(ctx, d) } } return nil } -func (optr *Operator) syncMachineConfigController(config *renderConfig) error { +func (optr *Operator) syncMachineConfigController(ctx context.Context, config *renderConfig) error { paths := manifestPaths{ clusterRoles: []string{ mccClusterRoleManifestPath, @@ -541,7 +541,7 @@ func (optr *Operator) syncMachineConfigController(config *renderConfig) error { mccServiceAccountManifestPath, }, } - if err := optr.applyManifests(config, paths); err != nil { + if err := optr.applyManifests(ctx, config, paths); err != nil { return fmt.Errorf("failed to apply machine config controller manifests: %w", err) } @@ -551,12 +551,12 @@ func (optr *Operator) syncMachineConfigController(config *renderConfig) error { } mcc := resourceread.ReadDeploymentV1OrDie(mccBytes) - _, updated, err := mcoResourceApply.ApplyDeployment(optr.kubeClient.AppsV1(), mcc) + _, updated, err := mcoResourceApply.ApplyDeployment(ctx, optr.kubeClient.AppsV1(), mcc) if err != nil { return err } if updated { - if err := optr.waitForDeploymentRollout(mcc); err != nil { + if err := optr.waitForDeploymentRollout(ctx, mcc); err != nil { return err } } @@ -576,14 +576,14 @@ func (optr *Operator) syncMachineConfigController(config *renderConfig) error { optrVersion, _ := optr.vStore.Get("operator") cc.Annotations[ctrlcommon.ReleaseImageVersionAnnotationKey] = optrVersion - _, _, err = mcoResourceApply.ApplyControllerConfig(optr.client.MachineconfigurationV1(), cc) + _, _, err = mcoResourceApply.ApplyControllerConfig(ctx, optr.client.MachineconfigurationV1(), cc) if err != nil { return err } - return optr.waitForControllerConfigToBeCompleted(cc) + return optr.waitForControllerConfigToBeCompleted(ctx, cc) } -func (optr *Operator) syncMachineConfigDaemon(config *renderConfig) error { +func (optr *Operator) syncMachineConfigDaemon(ctx context.Context, config *renderConfig) error { paths := manifestPaths{ clusterRoles: []string{ mcdClusterRoleManifestPath, @@ -603,21 +603,21 @@ func (optr *Operator) syncMachineConfigDaemon(config *renderConfig) error { } // Only generate a new proxy cookie secret if the secret does not exist or if it has been deleted. - _, err := optr.kubeClient.CoreV1().Secrets(config.TargetNamespace).Get(context.TODO(), "cookie-secret", metav1.GetOptions{}) + _, err := optr.kubeClient.CoreV1().Secrets(config.TargetNamespace).Get(ctx, "cookie-secret", metav1.GetOptions{}) if apierrors.IsNotFound(err) { paths.secrets = []string{"manifests/machineconfigdaemon/cookie-secret.yaml"} } else if err != nil { return err } - if err := optr.applyManifests(config, paths); err != nil { + if err := optr.applyManifests(ctx, config, paths); err != nil { return fmt.Errorf("failed to apply machine config daemon manifests: %w", err) } return nil } -func (optr *Operator) syncMachineConfigServer(config *renderConfig) error { +func (optr *Operator) syncMachineConfigServer(ctx context.Context, config *renderConfig) error { paths := manifestPaths{ clusterRoles: []string{ mcsClusterRoleManifestPath, @@ -636,7 +636,7 @@ func (optr *Operator) syncMachineConfigServer(config *renderConfig) error { }, daemonset: mcsDaemonsetManifestPath, } - if err := optr.applyManifests(config, paths); err != nil { + if err := optr.applyManifests(ctx, config, paths); err != nil { return fmt.Errorf("failed to apply machine config server manifests: %w", err) } @@ -645,11 +645,11 @@ func (optr *Operator) syncMachineConfigServer(config *renderConfig) error { // syncRequiredMachineConfigPools ensures that all the nodes in machineconfigpools labeled with requiredForUpgradeMachineConfigPoolLabelKey // have updated to the latest configuration. -func (optr *Operator) syncRequiredMachineConfigPools(_ *renderConfig) error { +func (optr *Operator) syncRequiredMachineConfigPools(ctx context.Context, _ *renderConfig) error { var lastErr error if err := wait.Poll(time.Second, 10*time.Minute, func() (bool, error) { if lastErr != nil { - co, err := optr.fetchClusterOperator() + co, err := optr.fetchClusterOperator(ctx) if err != nil { errs := kubeErrs.NewAggregate([]error{err, lastErr}) lastErr = fmt.Errorf("failed to fetch clusteroperator: %w", errs) @@ -678,7 +678,7 @@ func (optr *Operator) syncRequiredMachineConfigPools(_ *renderConfig) error { if degraded { lastErr = fmt.Errorf("error pool %s is not ready, retrying. Status: (pool degraded: %v total: %d, ready %d, updated: %d, unavailable: %d)", pool.Name, degraded, pool.Status.MachineCount, pool.Status.ReadyMachineCount, pool.Status.UpdatedMachineCount, pool.Status.UnavailableMachineCount) glog.Errorf("Error syncing Required MachineConfigPools: %q", lastErr) - syncerr := optr.syncUpgradeableStatus() + syncerr := optr.syncUpgradeableStatus(ctx) if syncerr != nil { glog.Errorf("Error syncingUpgradeableStatus: %q", syncerr) } @@ -696,7 +696,7 @@ func (optr *Operator) syncRequiredMachineConfigPools(_ *renderConfig) error { releaseVersion, _ := optr.vStore.Get("operator") if err := isMachineConfigPoolConfigurationValid(pool, version.Hash, releaseVersion, opURL, optr.mcLister.Get); err != nil { lastErr = fmt.Errorf("pool %s has not progressed to latest configuration: %w, retrying", pool.Name, err) - syncerr := optr.syncUpgradeableStatus() + syncerr := optr.syncUpgradeableStatus(ctx) if syncerr != nil { glog.Errorf("Error syncingUpgradeableStatus: %q", syncerr) } @@ -708,7 +708,7 @@ func (optr *Operator) syncRequiredMachineConfigPools(_ *renderConfig) error { continue } lastErr = fmt.Errorf("error required pool %s is not ready, retrying. Status: (total: %d, ready %d, updated: %d, unavailable: %d, degraded: %d)", pool.Name, pool.Status.MachineCount, pool.Status.ReadyMachineCount, pool.Status.UpdatedMachineCount, pool.Status.UnavailableMachineCount, pool.Status.DegradedMachineCount) - syncerr := optr.syncUpgradeableStatus() + syncerr := optr.syncUpgradeableStatus(ctx) if syncerr != nil { glog.Errorf("Error syncingUpgradeableStatus: %q", syncerr) } @@ -745,9 +745,10 @@ const ( controllerConfigCompletedTimeout = 5 * time.Minute ) -func (optr *Operator) waitForCustomResourceDefinition(resource *apiextv1.CustomResourceDefinition) error { +func (optr *Operator) waitForCustomResourceDefinition(ctx context.Context, resource *apiextv1.CustomResourceDefinition) error { var lastErr error - if err := wait.Poll(customResourceReadyInterval, customResourceReadyTimeout, func() (bool, error) { + + if err := wait.PollWithContext(ctx, customResourceReadyInterval, customResourceReadyTimeout, func(context.Context) (bool, error) { crd, err := optr.crdLister.Get(resource.Name) if err != nil { lastErr = fmt.Errorf("error getting CustomResourceDefinition %s: %w", resource.Name, err) @@ -772,9 +773,9 @@ func (optr *Operator) waitForCustomResourceDefinition(resource *apiextv1.CustomR } //nolint:dupl -func (optr *Operator) waitForDeploymentRollout(resource *appsv1.Deployment) error { +func (optr *Operator) waitForDeploymentRollout(ctx context.Context, resource *appsv1.Deployment) error { var lastErr error - if err := wait.Poll(deploymentRolloutPollInterval, deploymentRolloutTimeout, func() (bool, error) { + if err := wait.PollWithContext(ctx, deploymentRolloutPollInterval, deploymentRolloutTimeout, func(context.Context) (bool, error) { d, err := optr.deployLister.Deployments(resource.Namespace).Get(resource.Name) if apierrors.IsNotFound(err) { // exit early to recreate the deployment. @@ -807,9 +808,9 @@ func (optr *Operator) waitForDeploymentRollout(resource *appsv1.Deployment) erro } //nolint:dupl -func (optr *Operator) waitForDaemonsetRollout(resource *appsv1.DaemonSet) error { +func (optr *Operator) waitForDaemonsetRollout(ctx context.Context, resource *appsv1.DaemonSet) error { var lastErr error - if err := wait.Poll(daemonsetRolloutPollInterval, daemonsetRolloutTimeout, func() (bool, error) { + if err := wait.PollWithContext(ctx, daemonsetRolloutPollInterval, daemonsetRolloutTimeout, func(context.Context) (bool, error) { d, err := optr.daemonsetLister.DaemonSets(resource.Namespace).Get(resource.Name) if apierrors.IsNotFound(err) { // exit early to recreate the daemonset. @@ -841,9 +842,9 @@ func (optr *Operator) waitForDaemonsetRollout(resource *appsv1.DaemonSet) error return nil } -func (optr *Operator) waitForControllerConfigToBeCompleted(resource *mcfgv1.ControllerConfig) error { +func (optr *Operator) waitForControllerConfigToBeCompleted(ctx context.Context, resource *mcfgv1.ControllerConfig) error { var lastErr error - if err := wait.Poll(controllerConfigCompletedInterval, controllerConfigCompletedTimeout, func() (bool, error) { + if err := wait.PollWithContext(ctx, controllerConfigCompletedInterval, controllerConfigCompletedTimeout, func(context.Context) (bool, error) { if err := mcfgv1.IsControllerConfigCompleted(resource.GetName(), optr.ccLister.Get); err != nil { lastErr = fmt.Errorf("controllerconfig is not completed: %w", err) return false, nil From 474648573191caf484c62189aa52abcd6e68ffb5 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:28:54 -0500 Subject: [PATCH 15/18] clusterNodeWriter use context in UpdateNodeRetry We changed the signature of UpdateNodeRetry over in internals because of what we're doing in controller/operator land, this just adjusts the call over here to take that into account. --- pkg/daemon/writer.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index 400a8591cb..d92debb81d 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -1,6 +1,7 @@ package daemon import ( + "context" "fmt" "k8s.io/client-go/kubernetes/scheme" @@ -264,7 +265,7 @@ func (nw *clusterNodeWriter) Eventf(eventtype, reason, messageFmt string, args . } func implSetNodeAnnotations(client corev1client.NodeInterface, lister corev1lister.NodeLister, nodeName string, m map[string]string) response { - node, err := internal.UpdateNodeRetry(client, lister, nodeName, func(node *corev1.Node) { + node, err := internal.UpdateNodeRetry(context.TODO(), client, lister, nodeName, func(node *corev1.Node) { for k, v := range m { node.Annotations[k] = v } From 67cef3db84faabba3ae1b4283f6da12b847c1cd4 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:31:43 -0500 Subject: [PATCH 16/18] Update bootstrap test account for context changes A bunch of the method signatures changed as a result of the 'contextification' of the MCO's controllers and operator, this just updates the bootstrap test to match those new signatures (currently with context.TODO() ) --- test/e2e-bootstrap/bootstrap_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e-bootstrap/bootstrap_test.go b/test/e2e-bootstrap/bootstrap_test.go index 3aa3c7dc3d..e08d32a57c 100644 --- a/test/e2e-bootstrap/bootstrap_test.go +++ b/test/e2e-bootstrap/bootstrap_test.go @@ -305,7 +305,7 @@ spec: // Run the bootstrap bootstrapper := bootstrap.New(templatesDir, srcDir, filepath.Join(bootstrapTestDataDir, "/machineconfigcontroller-pull-secret")) - err = bootstrapper.Run(destDir) + err = bootstrapper.Run(context.TODO(), destDir) require.NoError(t, err) // Compare the rendered configs @@ -364,7 +364,7 @@ func newTestFixture(t *testing.T, cfg *rest.Config, objs []runtime.Object) *fixt close(ctrlctx.InformersStarted) for _, c := range controllers { - go c.Run(2, ctrlctx.Stop) + go c.Run(context.TODO(), 2) } return &fixture{ From 1ac1bce9106c9973a0473042a0117b0a567e26d3 Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:33:32 -0500 Subject: [PATCH 17/18] Gracefully shutdown controller and release lease This: - adds main/lease contexts to the controller - sets up a counter and channels to track goroutine completion - sets up a signal handler to catch when the controller is being terminated so we can cancel our contexts - gracefully shuts down the controller upon receipt of a SIGINT/SIGTERM The reason this does not use sync.WaitGroup instead is that sync.WaitGroup has no awareness of 'what' it's waiting for, just 'how many', so the channels are more useful. Cribbed off of what the CVO did here: https://github.com/openshift/cluster-version-operator/pull/424 --- cmd/machine-config-controller/start.go | 122 +++++++++++++++++++++---- 1 file changed, 103 insertions(+), 19 deletions(-) diff --git a/cmd/machine-config-controller/start.go b/cmd/machine-config-controller/start.go index d43e1dac09..70e1da2043 100644 --- a/cmd/machine-config-controller/start.go +++ b/cmd/machine-config-controller/start.go @@ -4,6 +4,8 @@ import ( "context" "flag" "fmt" + "os" + "time" "github.com/golang/glog" "github.com/openshift/machine-config-operator/cmd/common" @@ -18,6 +20,7 @@ import ( "github.com/openshift/machine-config-operator/pkg/version" "github.com/spf13/cobra" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/tools/leaderelection" ) @@ -44,10 +47,20 @@ func init() { startCmd.PersistentFlags().StringVar(&startOpts.promMetricsListenAddress, "metrics-listen-address", "127.0.0.1:8797", "Listen address for prometheus metrics listener") } +type asyncResult struct { + name string + error error +} + func runStartCmd(cmd *cobra.Command, args []string) { flag.Set("logtostderr", "true") flag.Parse() + // This is the context that signals whether the operator should be running and doing work + runContext, runCancel := context.WithCancel(context.Background()) + // This is the context that signals whether we should release our leader lease + leaderContext, leaderCancel := context.WithCancel(context.Background()) + // To help debugging, immediately log version glog.Infof("Version: %+v (%s)", version.Raw, version.Hash) @@ -55,18 +68,29 @@ func runStartCmd(cmd *cobra.Command, args []string) { if err != nil { ctrlcommon.WriteTerminationError(fmt.Errorf("creating clients: %w", err)) } - run := func(ctx context.Context) { - ctrlctx := ctrlcommon.CreateControllerContext(cb, ctx.Done(), componentName) + + resultChannel := make(chan asyncResult, 1) + resultChannelCount := 0 + + // The context that gets passed into this function by the leaderelection stuff is the leaderContext, so we don't use it. + // In the one case where we would have used it ( where we've lost a leaderelection for some reason) we really just need to insta-die + // rather than cleanly shut down. + run := func(_ context.Context) { + + // Set up the signal handler + go common.SignalHandler(runCancel) + + ctrlctx := ctrlcommon.CreateControllerContext(cb, runContext.Done(), componentName) // Start the metrics handler - go ctrlcommon.StartMetricsListener(startOpts.promMetricsListenAddress, ctrlctx.Stop) + resultChannelCount++ + go func() { + defer utilruntime.HandleCrash() + err := ctrlcommon.StartMetricsListener(startOpts.promMetricsListenAddress, ctrlctx.Stop) + resultChannel <- asyncResult{name: "metrics handler", error: err} + }() controllers := createControllers(ctrlctx) - draincontroller := drain.New( - ctrlctx.KubeInformerFactory.Core().V1().Nodes(), - ctrlctx.ClientBuilder.KubeClientOrDie("node-update-controller"), - ctrlctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"), - ) // Start the shared factory informers that you need to use in your controller ctrlctx.InformerFactory.Start(ctrlctx.Stop) @@ -75,27 +99,81 @@ func runStartCmd(cmd *cobra.Command, args []string) { ctrlctx.ConfigInformerFactory.Start(ctrlctx.Stop) ctrlctx.OperatorInformerFactory.Start(ctrlctx.Stop) + // Make sure the informers have started close(ctrlctx.InformersStarted) - for _, c := range controllers { - go c.Run(2, ctrlctx.Stop) + // Start the actual controllers + for num := range controllers { + resultChannelCount++ + glog.Infof("Starring %s controller", controllers[num].Name()) + // Closure, need to make sure we don't grab the loop reference + controller := controllers[num] + go func() { + defer utilruntime.HandleCrash() + if controller.Name() == "DrainController" { + controller.Run(runContext, 5) + } else { + controller.Run(runContext, 2) + } + resultChannel <- asyncResult{name: controller.Name() + " controller", error: err} + }() } - go draincontroller.Run(5, ctrlctx.Stop) - select {} + var shutdownTimer *time.Timer + for resultChannelCount > 0 { + glog.Infof("Waiting on %d outstanding goroutines.", resultChannelCount) + if shutdownTimer == nil { // running + select { + case <-runContext.Done(): + glog.Info("Run context completed; beginning two-minute graceful shutdown period.") + shutdownTimer = time.NewTimer(2 * time.Minute) + + case result := <-resultChannel: + // TODO(jkyros): one of our goroutines puked early, this means we shut down everything. + resultChannelCount-- + if result.error == nil { + glog.Infof("Collected %s goroutine.", result.name) + } else { + glog.Errorf("Collected %s goroutine: %v", result.name, result.error) + runCancel() // this will cause shutdownTimer initialization in the next loop + } + } + } else { // shutting down + select { + case <-shutdownTimer.C: // never triggers after the channel is stopped, although it would not matter much if it did because subsequent cancel calls do nothing. + leaderCancel() + shutdownTimer.Stop() + case result := <-resultChannel: + resultChannelCount-- + if result.error == nil { + glog.Infof("Collected %s goroutine.", result.name) + } else { + glog.Errorf("Collected %s goroutine: %v", result.name, result.error) + } + if resultChannelCount == 0 { + glog.Info("That was the last one, cancelling the leader lease.") + + } + } + } + } + leaderCancel() + glog.Info("Finished collecting operator goroutines.") } - leaderElectionCfg := common.GetLeaderElectionConfig(cb.GetBuilderConfig()) + leaderElectionCfg := common.GetLeaderElectionConfig(runContext, cb.GetBuilderConfig()) - leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ - Lock: common.CreateResourceLock(cb, startOpts.resourceLockNamespace, componentName), - LeaseDuration: leaderElectionCfg.LeaseDuration.Duration, - RenewDeadline: leaderElectionCfg.RenewDeadline.Duration, - RetryPeriod: leaderElectionCfg.RetryPeriod.Duration, + leaderelection.RunOrDie(leaderContext, leaderelection.LeaderElectionConfig{ + Lock: common.CreateResourceLock(cb, startOpts.resourceLockNamespace, componentName), + ReleaseOnCancel: true, + LeaseDuration: leaderElectionCfg.LeaseDuration.Duration, + RenewDeadline: leaderElectionCfg.RenewDeadline.Duration, + RetryPeriod: leaderElectionCfg.RetryPeriod.Duration, Callbacks: leaderelection.LeaderCallbacks{ OnStartedLeading: run, OnStoppedLeading: func() { - glog.Fatalf("leaderelection lost") + glog.Infof("Stopped leading. Terminating.") + os.Exit(0) }, }, }) @@ -160,6 +238,12 @@ func createControllers(ctx *ctrlcommon.ControllerContext) []ctrlcommon.Controlle ctx.ClientBuilder.KubeClientOrDie("node-update-controller"), ctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"), ), + // The drain controller drains pods from nodes + drain.New( + ctx.KubeInformerFactory.Core().V1().Nodes(), + ctx.ClientBuilder.KubeClientOrDie("node-update-controller"), + ctx.ClientBuilder.MachineConfigClientOrDie("node-update-controller"), + ), ) return controllers From 450915448cd354e1404861d88446afcd4786720b Mon Sep 17 00:00:00 2001 From: John Kyros <79665180+jkyros@users.noreply.github.com> Date: Wed, 15 Jun 2022 13:38:27 -0500 Subject: [PATCH 18/18] Gracefully shut down operator and release lease This: - adds main/lease contexts to the operator - sets up a counter and channels to track goroutine completion - sets up a signal handler to catch when the operator is being terminated so we can cancel our contexts - gracefully shuts down the operator upon receipt of a SIGINT/SIGTERM The reason this does not use sync.WaitGroup instead is that sync.WaitGroup has no awareness of 'what' it's waiting for, just 'how many', so the channels are more useful. Cribbed off of what the CVO did here: https://github.com/openshift/cluster-version-operator/pull/424 --- cmd/machine-config-operator/start.go | 88 ++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 11 deletions(-) diff --git a/cmd/machine-config-operator/start.go b/cmd/machine-config-operator/start.go index c6da99e420..76b38f4340 100644 --- a/cmd/machine-config-operator/start.go +++ b/cmd/machine-config-operator/start.go @@ -4,6 +4,7 @@ import ( "context" "flag" "os" + "time" "github.com/golang/glog" "github.com/openshift/machine-config-operator/cmd/common" @@ -12,6 +13,7 @@ import ( "github.com/openshift/machine-config-operator/pkg/operator" "github.com/openshift/machine-config-operator/pkg/version" "github.com/spf13/cobra" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/tools/leaderelection" ) @@ -35,10 +37,24 @@ func init() { startCmd.PersistentFlags().StringVar(&startOpts.imagesFile, "images-json", "", "images.json file for MCO.") } +type asyncResult struct { + name string + error error +} + func runStartCmd(cmd *cobra.Command, args []string) { flag.Set("logtostderr", "true") flag.Parse() + // This is the context that signals whether the operator should be running and doing work + runContext, runCancel := context.WithCancel(context.Background()) + // This is the context that signals whether we should release our leader lease + leaderContext, leaderCancel := context.WithCancel(context.Background()) + + // So we can collect status of our goroutines + resultChannel := make(chan asyncResult, 1) + resultChannelCount := 0 + // To help debugging, immediately log version glog.Infof("Version: %s (Raw: %s, Hash: %s)", os.Getenv("RELEASE_VERSION"), version.Raw, version.Hash) @@ -50,8 +66,11 @@ func runStartCmd(cmd *cobra.Command, args []string) { if err != nil { glog.Fatalf("error creating clients: %v", err) } - run := func(ctx context.Context) { - ctrlctx := ctrlcommon.CreateControllerContext(cb, ctx.Done(), ctrlcommon.MCONamespace) + run := func(_ context.Context) { + + go common.SignalHandler(runCancel) + + ctrlctx := ctrlcommon.CreateControllerContext(cb, runContext.Done(), ctrlcommon.MCONamespace) controller := operator.New( ctrlcommon.MCONamespace, componentName, startOpts.imagesFile, @@ -89,22 +108,69 @@ func runStartCmd(cmd *cobra.Command, args []string) { ctrlctx.KubeMAOSharedInformer.Start(ctrlctx.Stop) close(ctrlctx.InformersStarted) - go controller.Run(2, ctrlctx.Stop) + resultChannelCount++ + go func() { + defer utilruntime.HandleCrash() + controller.Run(runContext, 2) + resultChannel <- asyncResult{name: "main operator", error: err} + }() + + // TODO(jkyros); This might be overkill for the operator, it only has one goroutine + var shutdownTimer *time.Timer + for resultChannelCount > 0 { + glog.Infof("Waiting on %d outstanding goroutines.", resultChannelCount) + if shutdownTimer == nil { // running + select { + case <-runContext.Done(): + glog.Info("Run context completed; beginning two-minute graceful shutdown period.") + shutdownTimer = time.NewTimer(2 * time.Minute) - select {} + case result := <-resultChannel: + // TODO(jkyros): one of our goroutines puked early, this means we shut down everything. + resultChannelCount-- + if result.error == nil { + glog.Infof("Collected %s goroutine.", result.name) + } else { + glog.Errorf("Collected %s goroutine: %v", result.name, result.error) + runCancel() // this will cause shutdownTimer initialization in the next loop + } + } + } else { // shutting down + select { + case <-shutdownTimer.C: // never triggers after the channel is stopped, although it would not matter much if it did because subsequent cancel calls do nothing. + leaderCancel() + shutdownTimer.Stop() + case result := <-resultChannel: + resultChannelCount-- + if result.error == nil { + glog.Infof("Collected %s goroutine.", result.name) + } else { + glog.Errorf("Collected %s goroutine: %v", result.name, result.error) + } + if resultChannelCount == 0 { + glog.Info("That was the last one, cancelling the leader lease.") + leaderCancel() + } + } + } + } + glog.Info("Finished collecting operator goroutines.") } - leaderElectionCfg := common.GetLeaderElectionConfig(cb.GetBuilderConfig()) + // TODO(jkyros): should this be a different "pre-run" context here? + leaderElectionCfg := common.GetLeaderElectionConfig(runContext, cb.GetBuilderConfig()) - leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ - Lock: common.CreateResourceLock(cb, ctrlcommon.MCONamespace, componentName), - LeaseDuration: leaderElectionCfg.LeaseDuration.Duration, - RenewDeadline: leaderElectionCfg.RenewDeadline.Duration, - RetryPeriod: leaderElectionCfg.RetryPeriod.Duration, + leaderelection.RunOrDie(leaderContext, leaderelection.LeaderElectionConfig{ + Lock: common.CreateResourceLock(cb, ctrlcommon.MCONamespace, componentName), + ReleaseOnCancel: true, + LeaseDuration: leaderElectionCfg.LeaseDuration.Duration, + RenewDeadline: leaderElectionCfg.RenewDeadline.Duration, + RetryPeriod: leaderElectionCfg.RetryPeriod.Duration, Callbacks: leaderelection.LeaderCallbacks{ OnStartedLeading: run, OnStoppedLeading: func() { - glog.Fatalf("leaderelection lost") + glog.Infof("Stopped leading. Terminating.") + os.Exit(0) }, }, })