Skip to content

Commit 2b81f47

Browse files
cvo: Release our leader lease when we are gracefully terminated
On an upgrade the CVO is always restarted but we don't release our lease, which causes the new CVO pod to have to wait. We should release our lease on exit. Kube 1.14 (pr 71490) contains a new flag on leader lease that allows the caller to step down gracefully. Because backporting that change to client-go is complicated, we instead emulate the logic. When that code is available we can simplify down. cmd: Refactor how the CVO is started so the integration test is consistent Move the integration test logic into a new package and reuse startup logic so that we have a much cleaner start command than before and so that we are testing what we run in the command. Remove rootOpts and startOpts and replace them with nested operations. Add a test that verifies we send leader election events.
1 parent 71c91fc commit 2b81f47

File tree

10 files changed

+597
-483
lines changed

10 files changed

+597
-483
lines changed

cmd/main.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,10 @@ var (
1818
Short: "Run Cluster Version Controller",
1919
Long: "",
2020
}
21-
22-
rootOpts struct {
23-
releaseImage string
24-
}
2521
)
2622

2723
func init() {
2824
rootCmd.PersistentFlags().AddGoFlagSet(flag.CommandLine)
29-
rootCmd.PersistentFlags().StringVar(&rootOpts.releaseImage, "release-image", "", "The Openshift release image url.")
3025
}
3126

3227
func main() {

cmd/render.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@ var (
1818
}
1919

2020
renderOpts struct {
21-
outputDir string
21+
releaseImage string
22+
outputDir string
2223
}
2324
)
2425

2526
func init() {
2627
rootCmd.AddCommand(renderCmd)
2728
renderCmd.PersistentFlags().StringVar(&renderOpts.outputDir, "output-dir", "", "The output directory where the manifests will be rendered.")
29+
renderCmd.PersistentFlags().StringVar(&renderOpts.releaseImage, "release-image", "", "The Openshift release image url.")
2830
}
2931

3032
func runRenderCmd(cmd *cobra.Command, args []string) {
@@ -34,10 +36,10 @@ func runRenderCmd(cmd *cobra.Command, args []string) {
3436
if renderOpts.outputDir == "" {
3537
glog.Fatalf("missing --output-dir flag, it is required")
3638
}
37-
if rootOpts.releaseImage == "" {
39+
if renderOpts.releaseImage == "" {
3840
glog.Fatalf("missing --release-image flag, it is required")
3941
}
40-
if err := cvo.Render(renderOpts.outputDir, rootOpts.releaseImage); err != nil {
42+
if err := cvo.Render(renderOpts.outputDir, renderOpts.releaseImage); err != nil {
4143
glog.Fatalf("Render command failed: %v", err)
4244
}
4345
}

cmd/start.go

Lines changed: 17 additions & 265 deletions
Original file line numberDiff line numberDiff line change
@@ -2,284 +2,36 @@ package main
22

33
import (
44
"flag"
5-
"fmt"
6-
"math/rand"
7-
"net/http"
8-
"os"
9-
"time"
105

116
"github.com/golang/glog"
12-
"github.com/google/uuid"
13-
clientset "github.com/openshift/client-go/config/clientset/versioned"
14-
informers "github.com/openshift/client-go/config/informers/externalversions"
15-
"github.com/openshift/cluster-version-operator/pkg/autoupdate"
16-
"github.com/openshift/cluster-version-operator/pkg/cvo"
7+
"github.com/openshift/cluster-version-operator/pkg/start"
178
"github.com/openshift/cluster-version-operator/pkg/version"
18-
"github.com/prometheus/client_golang/prometheus/promhttp"
199
"github.com/spf13/cobra"
20-
v1 "k8s.io/api/core/v1"
21-
apiext "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
22-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23-
"k8s.io/apimachinery/pkg/runtime"
24-
"k8s.io/client-go/kubernetes"
25-
"k8s.io/client-go/rest"
26-
"k8s.io/client-go/tools/clientcmd"
27-
"k8s.io/client-go/tools/leaderelection"
28-
"k8s.io/client-go/tools/leaderelection/resourcelock"
29-
"k8s.io/client-go/tools/record"
3010
)
3111

32-
const (
33-
minResyncPeriod = 2 * time.Minute
34-
35-
leaseDuration = 90 * time.Second
36-
renewDeadline = 45 * time.Second
37-
retryPeriod = 30 * time.Second
38-
)
39-
40-
var (
41-
startCmd = &cobra.Command{
12+
func init() {
13+
opts := start.NewOptions()
14+
cmd := &cobra.Command{
4215
Use: "start",
4316
Short: "Starts Cluster Version Operator",
4417
Long: "",
45-
Run: runStartCmd,
46-
}
47-
48-
startOpts struct {
49-
// name is provided for testing only to allow multiple CVO's to be running at once
50-
name string
51-
// namespace is provided for testing only
52-
namespace string
53-
54-
kubeconfig string
55-
nodeName string
56-
listenAddr string
57-
58-
enableAutoUpdate bool
59-
}
60-
)
61-
62-
func init() {
63-
rootCmd.AddCommand(startCmd)
64-
startCmd.PersistentFlags().StringVar(&startOpts.listenAddr, "listen", "0.0.0.0:9099", "Address to listen on for metrics")
65-
startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)")
66-
startCmd.PersistentFlags().StringVar(&startOpts.nodeName, "node-name", "", "kubernetes node name CVO is scheduled on.")
67-
startCmd.PersistentFlags().BoolVar(&startOpts.enableAutoUpdate, "enable-auto-update", true, "Enables the autoupdate controller.")
68-
}
69-
70-
func runStartCmd(cmd *cobra.Command, args []string) {
71-
flag.Set("logtostderr", "true")
72-
flag.Parse()
73-
74-
// To help debugging, immediately log version
75-
glog.Infof("%s", version.String)
76-
77-
if startOpts.nodeName == "" {
78-
name, ok := os.LookupEnv("NODE_NAME")
79-
if !ok || name == "" {
80-
glog.Fatalf("node-name is required")
81-
}
82-
startOpts.nodeName = name
83-
}
84-
85-
// exposed for end-to-end testing only
86-
startOpts.name = os.Getenv("CVO_NAME")
87-
if len(startOpts.name) == 0 {
88-
startOpts.name = componentName
89-
}
90-
startOpts.namespace = os.Getenv("CVO_NAMESPACE")
91-
if len(startOpts.name) == 0 {
92-
startOpts.namespace = componentNamespace
93-
}
18+
Run: func(cmd *cobra.Command, args []string) {
19+
flag.Set("logtostderr", "true")
20+
flag.Parse()
9421

95-
if rootOpts.releaseImage == "" {
96-
glog.Fatalf("missing --release-image flag, it is required")
97-
}
22+
// To help debugging, immediately log version
23+
glog.Infof("%s", version.String)
9824

99-
if len(startOpts.listenAddr) > 0 {
100-
mux := http.NewServeMux()
101-
mux.Handle("/metrics", promhttp.Handler())
102-
go func() {
103-
if err := http.ListenAndServe(startOpts.listenAddr, mux); err != nil {
104-
glog.Fatalf("Unable to start metrics server: %v", err)
25+
if err := opts.Run(); err != nil {
26+
glog.Fatalf("error: %v", err)
10527
}
106-
}()
107-
}
108-
109-
cb, err := newClientBuilder(startOpts.kubeconfig)
110-
if err != nil {
111-
glog.Fatalf("error creating clients: %v", err)
112-
}
113-
stopCh := make(chan struct{})
114-
run := func(stop <-chan struct{}) {
115-
116-
ctx := createControllerContext(cb, startOpts.name, stopCh)
117-
if err := startControllers(ctx); err != nil {
118-
glog.Fatalf("error starting controllers: %v", err)
119-
}
120-
121-
ctx.CVInformerFactory.Start(ctx.Stop)
122-
ctx.InformerFactory.Start(ctx.Stop)
123-
close(ctx.InformersStarted)
124-
125-
select {}
126-
}
127-
128-
leaderelection.RunOrDie(leaderelection.LeaderElectionConfig{
129-
Lock: createResourceLock(cb),
130-
LeaseDuration: leaseDuration,
131-
RenewDeadline: renewDeadline,
132-
RetryPeriod: retryPeriod,
133-
Callbacks: leaderelection.LeaderCallbacks{
134-
OnStartedLeading: run,
135-
OnStoppedLeading: func() {
136-
glog.Fatalf("leaderelection lost")
137-
},
138-
},
139-
})
140-
panic("unreachable")
141-
}
142-
143-
func createResourceLock(cb *clientBuilder) resourcelock.Interface {
144-
recorder := record.
145-
NewBroadcaster().
146-
NewRecorder(runtime.NewScheme(), v1.EventSource{Component: componentName})
147-
148-
id, err := os.Hostname()
149-
if err != nil {
150-
glog.Fatalf("error creating lock: %v", err)
151-
}
152-
153-
uuid, err := uuid.NewRandom()
154-
if err != nil {
155-
glog.Fatalf("Failed to generate UUID: %v", err)
156-
}
157-
158-
// add a uniquifier so that two processes on the same host don't accidentally both become active
159-
id = id + "_" + uuid.String()
160-
161-
return &resourcelock.ConfigMapLock{
162-
ConfigMapMeta: metav1.ObjectMeta{
163-
Namespace: componentNamespace,
164-
Name: componentName,
16528
},
166-
Client: cb.KubeClientOrDie("leader-election").CoreV1(),
167-
LockConfig: resourcelock.ResourceLockConfig{
168-
Identity: id,
169-
EventRecorder: recorder,
170-
},
171-
}
172-
}
173-
174-
func resyncPeriod() func() time.Duration {
175-
return func() time.Duration {
176-
factor := rand.Float64() + 1
177-
return time.Duration(float64(minResyncPeriod.Nanoseconds()) * factor)
178-
}
179-
}
180-
181-
type clientBuilder struct {
182-
config *rest.Config
183-
}
184-
185-
func (cb *clientBuilder) RestConfig() *rest.Config {
186-
c := rest.CopyConfig(cb.config)
187-
return c
188-
}
189-
190-
func (cb *clientBuilder) ClientOrDie(name string) clientset.Interface {
191-
return clientset.NewForConfigOrDie(rest.AddUserAgent(cb.config, name))
192-
}
193-
194-
func (cb *clientBuilder) KubeClientOrDie(name string) kubernetes.Interface {
195-
return kubernetes.NewForConfigOrDie(rest.AddUserAgent(cb.config, name))
196-
}
197-
198-
func (cb *clientBuilder) APIExtClientOrDie(name string) apiext.Interface {
199-
return apiext.NewForConfigOrDie(rest.AddUserAgent(cb.config, name))
200-
}
201-
202-
func newClientBuilder(kubeconfig string) (*clientBuilder, error) {
203-
var config *rest.Config
204-
var err error
205-
206-
if kubeconfig != "" {
207-
glog.V(4).Infof("Loading kube client config from path %q", kubeconfig)
208-
config, err = clientcmd.BuildConfigFromFlags("", kubeconfig)
209-
} else {
210-
glog.V(4).Infof("Using in-cluster kube client config")
211-
config, err = rest.InClusterConfig()
212-
}
213-
if err != nil {
214-
return nil, err
215-
}
216-
217-
return &clientBuilder{
218-
config: config,
219-
}, nil
220-
}
221-
222-
type controllerContext struct {
223-
ClientBuilder *clientBuilder
224-
225-
CVInformerFactory informers.SharedInformerFactory
226-
InformerFactory informers.SharedInformerFactory
227-
228-
Stop <-chan struct{}
229-
230-
InformersStarted chan struct{}
231-
232-
ResyncPeriod func() time.Duration
233-
}
234-
235-
func createControllerContext(cb *clientBuilder, name string, stop <-chan struct{}) *controllerContext {
236-
client := cb.ClientOrDie("shared-informer")
237-
238-
cvInformer := informers.NewFilteredSharedInformerFactory(client, resyncPeriod()(), "", func(opts *metav1.ListOptions) {
239-
opts.FieldSelector = fmt.Sprintf("metadata.name=%s", name)
240-
})
241-
sharedInformers := informers.NewSharedInformerFactory(client, resyncPeriod()())
242-
243-
return &controllerContext{
244-
ClientBuilder: cb,
245-
CVInformerFactory: cvInformer,
246-
InformerFactory: sharedInformers,
247-
Stop: stop,
248-
InformersStarted: make(chan struct{}),
249-
ResyncPeriod: resyncPeriod(),
250-
}
251-
}
252-
253-
func startControllers(ctx *controllerContext) error {
254-
overrideDirectory := os.Getenv("PAYLOAD_OVERRIDE")
255-
if len(overrideDirectory) > 0 {
256-
glog.Warningf("Using an override payload directory for testing only: %s", overrideDirectory)
257-
}
258-
259-
go cvo.New(
260-
startOpts.nodeName,
261-
startOpts.namespace, startOpts.name,
262-
rootOpts.releaseImage,
263-
overrideDirectory,
264-
ctx.ResyncPeriod(),
265-
ctx.CVInformerFactory.Config().V1().ClusterVersions(),
266-
ctx.InformerFactory.Config().V1().ClusterOperators(),
267-
ctx.ClientBuilder.RestConfig(),
268-
ctx.ClientBuilder.ClientOrDie(componentName),
269-
ctx.ClientBuilder.KubeClientOrDie(componentName),
270-
ctx.ClientBuilder.APIExtClientOrDie(componentName),
271-
true,
272-
).Run(2, ctx.Stop)
273-
274-
if startOpts.enableAutoUpdate {
275-
go autoupdate.New(
276-
componentNamespace, componentName,
277-
ctx.CVInformerFactory.Config().V1().ClusterVersions(),
278-
ctx.InformerFactory.Config().V1().ClusterOperators(),
279-
ctx.ClientBuilder.ClientOrDie(componentName),
280-
ctx.ClientBuilder.KubeClientOrDie(componentName),
281-
).Run(2, ctx.Stop)
28229
}
28330

284-
return nil
31+
cmd.PersistentFlags().StringVar(&opts.ListenAddr, "listen", opts.ListenAddr, "Address to listen on for metrics")
32+
cmd.PersistentFlags().StringVar(&opts.Kubeconfig, "kubeconfig", opts.Kubeconfig, "Kubeconfig file to access a remote cluster (testing only)")
33+
cmd.PersistentFlags().StringVar(&opts.NodeName, "node-name", opts.NodeName, "kubernetes node name CVO is scheduled on.")
34+
cmd.PersistentFlags().BoolVar(&opts.EnableAutoUpdate, "enable-auto-update", opts.EnableAutoUpdate, "Enables the autoupdate controller.")
35+
cmd.PersistentFlags().StringVar(&opts.ReleaseImage, "release-image", opts.ReleaseImage, "The Openshift release image url.")
36+
rootCmd.AddCommand(cmd)
28537
}

pkg/autoupdate/autoupdate.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
"github.com/blang/semver"
99

1010
"github.com/golang/glog"
11-
"github.com/openshift/api/config/v1"
11+
v1 "github.com/openshift/api/config/v1"
1212
clientset "github.com/openshift/client-go/config/clientset/versioned"
1313
"github.com/openshift/client-go/config/clientset/versioned/scheme"
1414
configinformersv1 "github.com/openshift/client-go/config/informers/externalversions/config/v1"
@@ -65,7 +65,7 @@ func New(
6565
) *Controller {
6666
eventBroadcaster := record.NewBroadcaster()
6767
eventBroadcaster.StartLogging(glog.Infof)
68-
eventBroadcaster.StartRecordingToSink(&coreclientsetv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
68+
eventBroadcaster.StartRecordingToSink(&coreclientsetv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events(namespace)})
6969

7070
ctrl := &Controller{
7171
namespace: namespace,

pkg/cvo/cvo.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ func New(
137137
) *Operator {
138138
eventBroadcaster := record.NewBroadcaster()
139139
eventBroadcaster.StartLogging(glog.Infof)
140-
eventBroadcaster.StartRecordingToSink(&coreclientsetv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
140+
eventBroadcaster.StartRecordingToSink(&coreclientsetv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events(namespace)})
141141

142142
optr := &Operator{
143143
nodename: nodename,
@@ -162,7 +162,7 @@ func New(
162162

163163
optr.configSync = NewSyncWorker(
164164
optr.defaultPayloadRetriever(),
165-
optr.defaultResourceBuilder(),
165+
NewResourceBuilder(optr.restConfig),
166166
minimumInterval,
167167
wait.Backoff{
168168
Duration: time.Second * 10,
@@ -452,3 +452,8 @@ func (optr *Operator) currentVersion() configv1.Update {
452452
Payload: optr.releaseImage,
453453
}
454454
}
455+
456+
// SetSyncWorkerForTesting updates the sync worker for whitebox testing.
457+
func (optr *Operator) SetSyncWorkerForTesting(worker ConfigSyncWorker) {
458+
optr.configSync = worker
459+
}

0 commit comments

Comments
 (0)