diff --git a/cmd/common/helpers.go b/cmd/common/helpers.go index c129e6a721..fc77c49bf4 100644 --- a/cmd/common/helpers.go +++ b/cmd/common/helpers.go @@ -2,6 +2,8 @@ package common import ( "os" + "os/signal" + "syscall" "github.com/golang/glog" "github.com/openshift/machine-config-operator/internal/clients" @@ -69,3 +71,23 @@ func GetLeaderElectionConfig(restcfg *rest.Config) configv1.LeaderElection { return defaultLeaderElection } + +// SignalHandler catches SIGINT/SIGTERM signals and makes sure the passed context gets cancelled when those signals happen. This allows us to use a +// context to shut down our operations cleanly when we are signalled to shutdown. +func SignalHandler(runCancel context.CancelFunc) { + + // make a signal handling channel for os signals + ch := make(chan os.Signal, 1) + // stop listening for signals when we leave this function + defer func() { signal.Stop(ch) }() + // catch SIGINT and SIGTERM + signal.Notify(ch, os.Interrupt, syscall.SIGTERM) + sig := <-ch + glog.Infof("Shutting down due to: %s", sig) + // if we're shutting down, cancel the context so everything else will stop + runCancel() + glog.Infof("Context cancelled") + sig = <-ch + glog.Fatalf("Received shutdown signal twice, exiting: %s", sig) + +} diff --git a/cmd/machine-config-controller/start.go b/cmd/machine-config-controller/start.go index d43e1dac09..b437ae66ea 100644 --- a/cmd/machine-config-controller/start.go +++ b/cmd/machine-config-controller/start.go @@ -4,6 +4,7 @@ import ( "context" "flag" "fmt" + "os" "github.com/golang/glog" "github.com/openshift/machine-config-operator/cmd/common" @@ -48,6 +49,10 @@ func runStartCmd(cmd *cobra.Command, args []string) { flag.Set("logtostderr", "true") flag.Parse() + // This is 'main' context that we thread through the controller context and + // the leader elections. Cancelling this is "stop everything, we are shutting down". + runContext, runCancel := context.WithCancel(context.Background()) + // To help debugging, immediately log version glog.Infof("Version: %+v (%s)", version.Raw, version.Hash) @@ -56,6 +61,8 @@ func runStartCmd(cmd *cobra.Command, args []string) { ctrlcommon.WriteTerminationError(fmt.Errorf("creating clients: %w", err)) } run := func(ctx context.Context) { + go common.SignalHandler(runCancel) + ctrlctx := ctrlcommon.CreateControllerContext(cb, ctx.Done(), componentName) // Start the metrics handler @@ -82,20 +89,23 @@ func runStartCmd(cmd *cobra.Command, args []string) { } go draincontroller.Run(5, ctrlctx.Stop) - select {} + // wait here in this function until the context gets cancelled (which tells us whe were being shut down) + <-ctx.Done() } leaderElectionCfg := common.GetLeaderElectionConfig(cb.GetBuilderConfig()) - leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ - Lock: common.CreateResourceLock(cb, startOpts.resourceLockNamespace, componentName), - LeaseDuration: leaderElectionCfg.LeaseDuration.Duration, - RenewDeadline: leaderElectionCfg.RenewDeadline.Duration, - RetryPeriod: leaderElectionCfg.RetryPeriod.Duration, + leaderelection.RunOrDie(runContext, leaderelection.LeaderElectionConfig{ + Lock: common.CreateResourceLock(cb, startOpts.resourceLockNamespace, componentName), + ReleaseOnCancel: true, + LeaseDuration: leaderElectionCfg.LeaseDuration.Duration, + RenewDeadline: leaderElectionCfg.RenewDeadline.Duration, + RetryPeriod: leaderElectionCfg.RetryPeriod.Duration, Callbacks: leaderelection.LeaderCallbacks{ OnStartedLeading: run, OnStoppedLeading: func() { - glog.Fatalf("leaderelection lost") + glog.Infof("Stopped leading. Terminating.") + os.Exit(0) }, }, }) diff --git a/cmd/machine-config-operator/start.go b/cmd/machine-config-operator/start.go index c6da99e420..46857b86c5 100644 --- a/cmd/machine-config-operator/start.go +++ b/cmd/machine-config-operator/start.go @@ -39,6 +39,10 @@ func runStartCmd(cmd *cobra.Command, args []string) { flag.Set("logtostderr", "true") flag.Parse() + // This is 'main' context that we thread through the controller context and + // the leader elections. Cancelling this is "stop everything, we are shutting down". + runContext, runCancel := context.WithCancel(context.Background()) + // To help debugging, immediately log version glog.Infof("Version: %s (Raw: %s, Hash: %s)", os.Getenv("RELEASE_VERSION"), version.Raw, version.Hash) @@ -51,6 +55,8 @@ func runStartCmd(cmd *cobra.Command, args []string) { glog.Fatalf("error creating clients: %v", err) } run := func(ctx context.Context) { + go common.SignalHandler(runCancel) + ctrlctx := ctrlcommon.CreateControllerContext(cb, ctx.Done(), ctrlcommon.MCONamespace) controller := operator.New( ctrlcommon.MCONamespace, componentName, @@ -91,20 +97,23 @@ func runStartCmd(cmd *cobra.Command, args []string) { go controller.Run(2, ctrlctx.Stop) - select {} + // wait here in this function until the context gets cancelled (which tells us whe were being shut down) + <-ctx.Done() } leaderElectionCfg := common.GetLeaderElectionConfig(cb.GetBuilderConfig()) - leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ - Lock: common.CreateResourceLock(cb, ctrlcommon.MCONamespace, componentName), - LeaseDuration: leaderElectionCfg.LeaseDuration.Duration, - RenewDeadline: leaderElectionCfg.RenewDeadline.Duration, - RetryPeriod: leaderElectionCfg.RetryPeriod.Duration, + leaderelection.RunOrDie(runContext, leaderelection.LeaderElectionConfig{ + Lock: common.CreateResourceLock(cb, ctrlcommon.MCONamespace, componentName), + ReleaseOnCancel: true, + LeaseDuration: leaderElectionCfg.LeaseDuration.Duration, + RenewDeadline: leaderElectionCfg.RenewDeadline.Duration, + RetryPeriod: leaderElectionCfg.RetryPeriod.Duration, Callbacks: leaderelection.LeaderCallbacks{ OnStartedLeading: run, OnStoppedLeading: func() { - glog.Fatalf("leaderelection lost") + glog.Info("Stopped leading. Terminating.") + os.Exit(0) }, }, }) diff --git a/pkg/controller/common/metrics.go b/pkg/controller/common/metrics.go index c09b6ccba9..d3bf9049da 100644 --- a/pkg/controller/common/metrics.go +++ b/pkg/controller/common/metrics.go @@ -62,7 +62,12 @@ func StartMetricsListener(addr string, stopCh <-chan struct{}) { } }() <-stopCh - if err := s.Shutdown(context.Background()); err != http.ErrServerClosed { - glog.Errorf("error stopping metrics listener: %v", err) + if err := s.Shutdown(context.Background()); err != nil { + if err != http.ErrServerClosed { + glog.Errorf("error stopping metrics listener: %v", err) + } + } else { + glog.Infof("Metrics listener successfully stopped") } + }