Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cmd/common/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package common

import (
"os"
"os/signal"
"syscall"

"github.com/golang/glog"
"github.com/openshift/machine-config-operator/internal/clients"
Expand Down Expand Up @@ -69,3 +71,23 @@ func GetLeaderElectionConfig(restcfg *rest.Config) configv1.LeaderElection {

return defaultLeaderElection
}

// SignalHandler catches SIGINT/SIGTERM signals and makes sure the passed context gets cancelled when those signals happen. This allows us to use a
// context to shut down our operations cleanly when we are signalled to shutdown.
func SignalHandler(runCancel context.CancelFunc) {

// make a signal handling channel for os signals
ch := make(chan os.Signal, 1)
// stop listening for signals when we leave this function
defer func() { signal.Stop(ch) }()
// catch SIGINT and SIGTERM
signal.Notify(ch, os.Interrupt, syscall.SIGTERM)
sig := <-ch
glog.Infof("Shutting down due to: %s", sig)
// if we're shutting down, cancel the context so everything else will stop
runCancel()
glog.Infof("Context cancelled")
sig = <-ch
glog.Fatalf("Received shutdown signal twice, exiting: %s", sig)

}
24 changes: 17 additions & 7 deletions cmd/machine-config-controller/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"flag"
"fmt"
"os"

"github.com/golang/glog"
"github.com/openshift/machine-config-operator/cmd/common"
Expand Down Expand Up @@ -48,6 +49,10 @@ func runStartCmd(cmd *cobra.Command, args []string) {
flag.Set("logtostderr", "true")
flag.Parse()

// This is 'main' context that we thread through the controller context and
// the leader elections. Cancelling this is "stop everything, we are shutting down".
runContext, runCancel := context.WithCancel(context.Background())

// To help debugging, immediately log version
glog.Infof("Version: %+v (%s)", version.Raw, version.Hash)

Expand All @@ -56,6 +61,8 @@ func runStartCmd(cmd *cobra.Command, args []string) {
ctrlcommon.WriteTerminationError(fmt.Errorf("creating clients: %w", err))
}
run := func(ctx context.Context) {
go common.SignalHandler(runCancel)

ctrlctx := ctrlcommon.CreateControllerContext(cb, ctx.Done(), componentName)

// Start the metrics handler
Expand All @@ -82,20 +89,23 @@ func runStartCmd(cmd *cobra.Command, args []string) {
}
go draincontroller.Run(5, ctrlctx.Stop)

select {}
// wait here in this function until the context gets cancelled (which tells us whe were being shut down)
<-ctx.Done()
}

leaderElectionCfg := common.GetLeaderElectionConfig(cb.GetBuilderConfig())

leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{
Lock: common.CreateResourceLock(cb, startOpts.resourceLockNamespace, componentName),
LeaseDuration: leaderElectionCfg.LeaseDuration.Duration,
RenewDeadline: leaderElectionCfg.RenewDeadline.Duration,
RetryPeriod: leaderElectionCfg.RetryPeriod.Duration,
leaderelection.RunOrDie(runContext, leaderelection.LeaderElectionConfig{
Lock: common.CreateResourceLock(cb, startOpts.resourceLockNamespace, componentName),
ReleaseOnCancel: true,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I correct in understanding that essentially, this release is the crux of the PR here. Previously, we took the leader election (lock?) and never released it until the timeout happens?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are correct.

LeaseDuration: leaderElectionCfg.LeaseDuration.Duration,
RenewDeadline: leaderElectionCfg.RenewDeadline.Duration,
RetryPeriod: leaderElectionCfg.RetryPeriod.Duration,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: run,
OnStoppedLeading: func() {
glog.Fatalf("leaderelection lost")
glog.Infof("Stopped leading. Terminating.")
os.Exit(0)
},
},
})
Expand Down
23 changes: 16 additions & 7 deletions cmd/machine-config-operator/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ func runStartCmd(cmd *cobra.Command, args []string) {
flag.Set("logtostderr", "true")
flag.Parse()

// This is 'main' context that we thread through the controller context and
// the leader elections. Cancelling this is "stop everything, we are shutting down".
runContext, runCancel := context.WithCancel(context.Background())

// To help debugging, immediately log version
glog.Infof("Version: %s (Raw: %s, Hash: %s)", os.Getenv("RELEASE_VERSION"), version.Raw, version.Hash)

Expand All @@ -51,6 +55,8 @@ func runStartCmd(cmd *cobra.Command, args []string) {
glog.Fatalf("error creating clients: %v", err)
}
run := func(ctx context.Context) {
go common.SignalHandler(runCancel)

ctrlctx := ctrlcommon.CreateControllerContext(cb, ctx.Done(), ctrlcommon.MCONamespace)
controller := operator.New(
ctrlcommon.MCONamespace, componentName,
Expand Down Expand Up @@ -91,20 +97,23 @@ func runStartCmd(cmd *cobra.Command, args []string) {

go controller.Run(2, ctrlctx.Stop)

select {}
// wait here in this function until the context gets cancelled (which tells us whe were being shut down)
<-ctx.Done()
}

leaderElectionCfg := common.GetLeaderElectionConfig(cb.GetBuilderConfig())

leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{
Lock: common.CreateResourceLock(cb, ctrlcommon.MCONamespace, componentName),
LeaseDuration: leaderElectionCfg.LeaseDuration.Duration,
RenewDeadline: leaderElectionCfg.RenewDeadline.Duration,
RetryPeriod: leaderElectionCfg.RetryPeriod.Duration,
leaderelection.RunOrDie(runContext, leaderelection.LeaderElectionConfig{
Lock: common.CreateResourceLock(cb, ctrlcommon.MCONamespace, componentName),
ReleaseOnCancel: true,
LeaseDuration: leaderElectionCfg.LeaseDuration.Duration,
RenewDeadline: leaderElectionCfg.RenewDeadline.Duration,
RetryPeriod: leaderElectionCfg.RetryPeriod.Duration,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: run,
OnStoppedLeading: func() {
glog.Fatalf("leaderelection lost")
glog.Info("Stopped leading. Terminating.")
os.Exit(0)
},
},
})
Expand Down
9 changes: 7 additions & 2 deletions pkg/controller/common/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,12 @@ func StartMetricsListener(addr string, stopCh <-chan struct{}) {
}
}()
<-stopCh
if err := s.Shutdown(context.Background()); err != http.ErrServerClosed {
glog.Errorf("error stopping metrics listener: %v", err)
if err := s.Shutdown(context.Background()); err != nil {
if err != http.ErrServerClosed {
glog.Errorf("error stopping metrics listener: %v", err)
}
} else {
glog.Infof("Metrics listener successfully stopped")
}

}