@@ -117,6 +117,10 @@ type controllerManager struct {
117117 // it must be deferred until after gracefulShutdown is done.
118118 leaderElectionCancel context.CancelFunc
119119
120+ // leaderElectionStopped is an internal channel used to signal the stopping procedure that the
121+ // LeaderElection.Run(...) function has returned and the shutdown can proceed.
122+ leaderElectionStopped chan struct {}
123+
120124 // stop procedure engaged. In other words, we should not add anything else to the manager
121125 stopProcedureEngaged bool
122126
@@ -545,11 +549,16 @@ func (cm *controllerManager) engageStopProcedure(stopComplete <-chan struct{}) e
545549
546550// waitForRunnableToEnd blocks until all runnables ended or the
547551// tearDownTimeout was reached. In the latter case, an error is returned.
548- func (cm * controllerManager ) waitForRunnableToEnd (shutdownCancel context.CancelFunc ) error {
552+ func (cm * controllerManager ) waitForRunnableToEnd (shutdownCancel context.CancelFunc ) ( retErr error ) {
549553 // Cancel leader election only after we waited. It will os.Exit() the app for safety.
550554 defer func () {
551- if cm .leaderElectionCancel != nil {
555+ if retErr == nil && cm .leaderElectionCancel != nil {
556+ // After asking the context to be cancelled, make sure
557+ // we wait for the leader stopped channel to be closed, otherwise
558+ // we might encounter race conditions between this code
559+ // and the event recorder, which is used within leader election code.
552560 cm .leaderElectionCancel ()
561+ <- cm .leaderElectionStopped
553562 }
554563 }()
555564
@@ -652,7 +661,11 @@ func (cm *controllerManager) startLeaderElection() (err error) {
652661 }
653662
654663 // Start the leader elector process
655- go l .Run (ctx )
664+ go func () {
665+ l .Run (ctx )
666+ <- ctx .Done ()
667+ close (cm .leaderElectionStopped )
668+ }()
656669 return nil
657670}
658671
0 commit comments