Skip to content

Commit 2c890a7

Browse files
committed
pkg/cvo/sync_worker: Consolidate all ClusterOperator errors by reason
newClusterOperatorsNotAvailable is from c2ac20f (status: Report the operators that have not yet deployed, 2019-04-09, openshift#158). And the not-available filtering is from bdd4545 (status: Hide generic operator status in favor of more specific errors, 2019-05-19, openshift#192). But in ce1eda1 (pkg/cvo/internal/operatorstatus: Change nested message, 2021-02-04, openshift#514), we moved "waiting on status.versions" from ClusterOperatorNotAvailable to ClusterOperatorUpdating. And we want to avoid uncollapsed errors like: Multiple errors are preventing progress: * Cluster operator machine-api is updating versions * Cluster operator openshift-apiserver is updating versions where we are waiting on multiple ClusterOperator which are in similar situations. This commit drops the filtering, because cluster operators are important. It does sort those errors to the end of the list though, so the first error is the non-ClusterOperator error.
1 parent a4e3a77 commit 2c890a7

File tree

6 files changed

+386
-134
lines changed

6 files changed

+386
-134
lines changed

pkg/cvo/internal/operatorstatus.go

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,12 @@ func (b *clusterOperatorBuilder) Do(ctx context.Context) error {
126126
func checkOperatorHealth(ctx context.Context, client ClusterOperatorsGetter, expected *configv1.ClusterOperator, mode resourcebuilder.Mode) error {
127127
if len(expected.Status.Versions) == 0 {
128128
return &payload.UpdateError{
129-
UpdateEffect: payload.UpdateEffectFail,
130-
Reason: "ClusterOperatorNoVersions",
131-
Message: fmt.Sprintf("Cluster operator %s does not declare expected versions", expected.Name),
132-
Name: expected.Name,
129+
UpdateEffect: payload.UpdateEffectFail,
130+
Reason: "ClusterOperatorNoVersions",
131+
PluralReason: "ClusterOperatorsNoVersions",
132+
Message: fmt.Sprintf("Cluster operator %s does not declare expected versions", expected.Name),
133+
PluralMessageFormat: "Cluster operators %s do not declare expected versions",
134+
Name: expected.Name,
133135
}
134136
}
135137

@@ -189,11 +191,13 @@ func checkOperatorHealth(ctx context.Context, client ClusterOperatorsGetter, exp
189191
nestedMessage = fmt.Errorf("cluster operator %s is %s=%s: %s: %s", actual.Name, availableCondition.Type, availableCondition.Status, availableCondition.Reason, availableCondition.Message)
190192
}
191193
return &payload.UpdateError{
192-
Nested: nestedMessage,
193-
UpdateEffect: payload.UpdateEffectFail,
194-
Reason: "ClusterOperatorNotAvailable",
195-
Message: fmt.Sprintf("Cluster operator %s is not available", actual.Name),
196-
Name: actual.Name,
194+
Nested: nestedMessage,
195+
UpdateEffect: payload.UpdateEffectFail,
196+
Reason: "ClusterOperatorNotAvailable",
197+
PluralReason: "ClusterOperatorsNotAvailable",
198+
Message: fmt.Sprintf("Cluster operator %s is not available", actual.Name),
199+
PluralMessageFormat: "Cluster operators %s are not available",
200+
Name: actual.Name,
197201
}
198202
}
199203

@@ -203,23 +207,27 @@ func checkOperatorHealth(ctx context.Context, client ClusterOperatorsGetter, exp
203207
nestedMessage = fmt.Errorf("cluster operator %s is %s=%s: %s, %s", actual.Name, degradedCondition.Type, degradedCondition.Status, degradedCondition.Reason, degradedCondition.Message)
204208
}
205209
return &payload.UpdateError{
206-
Nested: nestedMessage,
207-
UpdateEffect: payload.UpdateEffectFailAfterInterval,
208-
Reason: "ClusterOperatorDegraded",
209-
Message: fmt.Sprintf("Cluster operator %s is degraded", actual.Name),
210-
Name: actual.Name,
210+
Nested: nestedMessage,
211+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
212+
Reason: "ClusterOperatorDegraded",
213+
PluralReason: "ClusterOperatorsDegraded",
214+
Message: fmt.Sprintf("Cluster operator %s is degraded", actual.Name),
215+
PluralMessageFormat: "Cluster operators %s are degraded",
216+
Name: actual.Name,
211217
}
212218
}
213219

214220
// during initialization we allow undone versions
215221
if len(undone) > 0 && mode != resourcebuilder.InitializingMode {
216222
nestedMessage = fmt.Errorf("cluster operator %s is available and not degraded but has not finished updating to target version", actual.Name)
217223
return &payload.UpdateError{
218-
Nested: nestedMessage,
219-
UpdateEffect: payload.UpdateEffectNone,
220-
Reason: "ClusterOperatorUpdating",
221-
Message: fmt.Sprintf("Cluster operator %s is updating versions", actual.Name),
222-
Name: actual.Name,
224+
Nested: nestedMessage,
225+
UpdateEffect: payload.UpdateEffectNone,
226+
Reason: "ClusterOperatorUpdating",
227+
PluralReason: "ClusterOperatorsUpdating",
228+
Message: fmt.Sprintf("Cluster operator %s is updating versions", actual.Name),
229+
PluralMessageFormat: "Cluster operators %s are updating versions",
230+
Name: actual.Name,
223231
}
224232
}
225233

pkg/cvo/internal/operatorstatus_test.go

Lines changed: 91 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,13 @@ func Test_checkOperatorHealth(t *testing.T) {
6363
},
6464
},
6565
expErr: &payload.UpdateError{
66-
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
67-
UpdateEffect: payload.UpdateEffectNone,
68-
Reason: "ClusterOperatorUpdating",
69-
Message: "Cluster operator test-co is updating versions",
70-
Name: "test-co",
66+
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
67+
UpdateEffect: payload.UpdateEffectNone,
68+
Reason: "ClusterOperatorUpdating",
69+
PluralReason: "ClusterOperatorsUpdating",
70+
Message: "Cluster operator test-co is updating versions",
71+
PluralMessageFormat: "Cluster operators %s are updating versions",
72+
Name: "test-co",
7173
},
7274
}, {
7375
name: "cluster operator reporting available=true, degraded=false, but no versions, while expecting an operator version",
@@ -91,11 +93,13 @@ func Test_checkOperatorHealth(t *testing.T) {
9193
},
9294
},
9395
expErr: &payload.UpdateError{
94-
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
95-
UpdateEffect: payload.UpdateEffectNone,
96-
Reason: "ClusterOperatorUpdating",
97-
Message: "Cluster operator test-co is updating versions",
98-
Name: "test-co",
96+
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
97+
UpdateEffect: payload.UpdateEffectNone,
98+
Reason: "ClusterOperatorUpdating",
99+
PluralReason: "ClusterOperatorsUpdating",
100+
Message: "Cluster operator test-co is updating versions",
101+
PluralMessageFormat: "Cluster operators %s are updating versions",
102+
Name: "test-co",
99103
},
100104
}, {
101105
name: "cluster operator reporting available=true and degraded=false, but no versions for operand",
@@ -122,11 +126,13 @@ func Test_checkOperatorHealth(t *testing.T) {
122126
},
123127
},
124128
expErr: &payload.UpdateError{
125-
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
126-
UpdateEffect: payload.UpdateEffectNone,
127-
Reason: "ClusterOperatorUpdating",
128-
Message: "Cluster operator test-co is updating versions",
129-
Name: "test-co",
129+
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
130+
UpdateEffect: payload.UpdateEffectNone,
131+
Reason: "ClusterOperatorUpdating",
132+
PluralReason: "ClusterOperatorsUpdating",
133+
Message: "Cluster operator test-co is updating versions",
134+
PluralMessageFormat: "Cluster operators %s are updating versions",
135+
Name: "test-co",
130136
},
131137
}, {
132138
name: "cluster operator reporting available=true and degraded=false, but old versions",
@@ -155,11 +161,13 @@ func Test_checkOperatorHealth(t *testing.T) {
155161
},
156162
},
157163
expErr: &payload.UpdateError{
158-
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
159-
UpdateEffect: payload.UpdateEffectNone,
160-
Reason: "ClusterOperatorUpdating",
161-
Message: "Cluster operator test-co is updating versions",
162-
Name: "test-co",
164+
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
165+
UpdateEffect: payload.UpdateEffectNone,
166+
Reason: "ClusterOperatorUpdating",
167+
PluralReason: "ClusterOperatorsUpdating",
168+
Message: "Cluster operator test-co is updating versions",
169+
PluralMessageFormat: "Cluster operators %s are updating versions",
170+
Name: "test-co",
163171
},
164172
}, {
165173
name: "cluster operator reporting available=true and degraded=false, but mix of desired and old versions",
@@ -188,11 +196,13 @@ func Test_checkOperatorHealth(t *testing.T) {
188196
},
189197
},
190198
expErr: &payload.UpdateError{
191-
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
192-
UpdateEffect: payload.UpdateEffectNone,
193-
Reason: "ClusterOperatorUpdating",
194-
Message: "Cluster operator test-co is updating versions",
195-
Name: "test-co",
199+
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
200+
UpdateEffect: payload.UpdateEffectNone,
201+
Reason: "ClusterOperatorUpdating",
202+
PluralReason: "ClusterOperatorsUpdating",
203+
Message: "Cluster operator test-co is updating versions",
204+
PluralMessageFormat: "Cluster operators %s are updating versions",
205+
Name: "test-co",
196206
},
197207
}, {
198208
name: "cluster operator reporting available=true, degraded=false, and desired operator, but old versions for 2 operands",
@@ -225,11 +235,13 @@ func Test_checkOperatorHealth(t *testing.T) {
225235
},
226236
},
227237
expErr: &payload.UpdateError{
228-
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
229-
UpdateEffect: payload.UpdateEffectNone,
230-
Reason: "ClusterOperatorUpdating",
231-
Message: "Cluster operator test-co is updating versions",
232-
Name: "test-co",
238+
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
239+
UpdateEffect: payload.UpdateEffectNone,
240+
Reason: "ClusterOperatorUpdating",
241+
PluralReason: "ClusterOperatorsUpdating",
242+
Message: "Cluster operator test-co is updating versions",
243+
PluralMessageFormat: "Cluster operators %s are updating versions",
244+
Name: "test-co",
233245
},
234246
}, {
235247
name: "cluster operator reporting available=true, degraded=false, and desired operator, but mix of old and desired versions for 2 operands",
@@ -262,11 +274,13 @@ func Test_checkOperatorHealth(t *testing.T) {
262274
},
263275
},
264276
expErr: &payload.UpdateError{
265-
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
266-
UpdateEffect: payload.UpdateEffectNone,
267-
Reason: "ClusterOperatorUpdating",
268-
Message: "Cluster operator test-co is updating versions",
269-
Name: "test-co",
277+
Nested: fmt.Errorf("cluster operator test-co is available and not degraded but has not finished updating to target version"),
278+
UpdateEffect: payload.UpdateEffectNone,
279+
Reason: "ClusterOperatorUpdating",
280+
PluralReason: "ClusterOperatorsUpdating",
281+
Message: "Cluster operator test-co is updating versions",
282+
PluralMessageFormat: "Cluster operators %s are updating versions",
283+
Name: "test-co",
270284
},
271285
}, {
272286
name: "cluster operator reporting desired versions and no conditions",
@@ -291,11 +305,13 @@ func Test_checkOperatorHealth(t *testing.T) {
291305
},
292306
},
293307
expErr: &payload.UpdateError{
294-
Nested: fmt.Errorf("cluster operator test-co: available=false, progressing=true, degraded=true, undone="),
295-
UpdateEffect: payload.UpdateEffectFail,
296-
Reason: "ClusterOperatorNotAvailable",
297-
Message: "Cluster operator test-co is not available",
298-
Name: "test-co",
308+
Nested: fmt.Errorf("cluster operator test-co: available=false, progressing=true, degraded=true, undone="),
309+
UpdateEffect: payload.UpdateEffectFail,
310+
Reason: "ClusterOperatorNotAvailable",
311+
PluralReason: "ClusterOperatorsNotAvailable",
312+
Message: "Cluster operator test-co is not available",
313+
PluralMessageFormat: "Cluster operators %s are not available",
314+
Name: "test-co",
299315
},
300316
}, {
301317
name: "cluster operator reporting progressing=true",
@@ -321,11 +337,13 @@ func Test_checkOperatorHealth(t *testing.T) {
321337
},
322338
},
323339
expErr: &payload.UpdateError{
324-
Nested: fmt.Errorf("cluster operator test-co: available=false, progressing=true, degraded=true, undone="),
325-
UpdateEffect: payload.UpdateEffectFail,
326-
Reason: "ClusterOperatorNotAvailable",
327-
Message: "Cluster operator test-co is not available",
328-
Name: "test-co",
340+
Nested: fmt.Errorf("cluster operator test-co: available=false, progressing=true, degraded=true, undone="),
341+
UpdateEffect: payload.UpdateEffectFail,
342+
Reason: "ClusterOperatorNotAvailable",
343+
PluralReason: "ClusterOperatorsNotAvailable",
344+
Message: "Cluster operator test-co is not available",
345+
PluralMessageFormat: "Cluster operators %s are not available",
346+
Name: "test-co",
329347
},
330348
}, {
331349
name: "cluster operator reporting available=false degraded=true",
@@ -351,11 +369,13 @@ func Test_checkOperatorHealth(t *testing.T) {
351369
},
352370
},
353371
expErr: &payload.UpdateError{
354-
Nested: fmt.Errorf("cluster operator test-co: available=false, progressing=true, degraded=true, undone="),
355-
UpdateEffect: payload.UpdateEffectFail,
356-
Reason: "ClusterOperatorNotAvailable",
357-
Message: "Cluster operator test-co is not available",
358-
Name: "test-co",
372+
Nested: fmt.Errorf("cluster operator test-co: available=false, progressing=true, degraded=true, undone="),
373+
UpdateEffect: payload.UpdateEffectFail,
374+
Reason: "ClusterOperatorNotAvailable",
375+
PluralReason: "ClusterOperatorsNotAvailable",
376+
Message: "Cluster operator test-co is not available",
377+
PluralMessageFormat: "Cluster operators %s are not available",
378+
Name: "test-co",
359379
},
360380
}, {
361381
name: "cluster operator reporting available=true, degraded=false, and progressing=true",
@@ -411,11 +431,13 @@ func Test_checkOperatorHealth(t *testing.T) {
411431
},
412432
},
413433
expErr: &payload.UpdateError{
414-
Nested: fmt.Errorf("cluster operator test-co is Degraded=True: RandomReason, random error"),
415-
UpdateEffect: payload.UpdateEffectFailAfterInterval,
416-
Reason: "ClusterOperatorDegraded",
417-
Message: "Cluster operator test-co is degraded",
418-
Name: "test-co",
434+
Nested: fmt.Errorf("cluster operator test-co is Degraded=True: RandomReason, random error"),
435+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
436+
Reason: "ClusterOperatorDegraded",
437+
PluralReason: "ClusterOperatorsDegraded",
438+
Message: "Cluster operator test-co is degraded",
439+
PluralMessageFormat: "Cluster operators %s are degraded",
440+
Name: "test-co",
419441
},
420442
}, {
421443
name: "cluster operator reporting available=true progressing=true degraded=true",
@@ -445,11 +467,13 @@ func Test_checkOperatorHealth(t *testing.T) {
445467
},
446468
},
447469
expErr: &payload.UpdateError{
448-
Nested: fmt.Errorf("cluster operator test-co is Degraded=True: RandomReason, random error"),
449-
UpdateEffect: payload.UpdateEffectFailAfterInterval,
450-
Reason: "ClusterOperatorDegraded",
451-
Message: "Cluster operator test-co is degraded",
452-
Name: "test-co",
470+
Nested: fmt.Errorf("cluster operator test-co is Degraded=True: RandomReason, random error"),
471+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
472+
Reason: "ClusterOperatorDegraded",
473+
PluralReason: "ClusterOperatorsDegraded",
474+
Message: "Cluster operator test-co is degraded",
475+
PluralMessageFormat: "Cluster operators %s are degraded",
476+
Name: "test-co",
453477
},
454478
}, {
455479
name: "cluster operator reporting available=true no progressing or degraded",
@@ -475,11 +499,13 @@ func Test_checkOperatorHealth(t *testing.T) {
475499
},
476500
},
477501
expErr: &payload.UpdateError{
478-
Nested: fmt.Errorf("cluster operator test-co: available=true, progressing=true, degraded=true, undone="),
479-
UpdateEffect: payload.UpdateEffectFailAfterInterval,
480-
Reason: "ClusterOperatorDegraded",
481-
Message: "Cluster operator test-co is degraded",
482-
Name: "test-co",
502+
Nested: fmt.Errorf("cluster operator test-co: available=true, progressing=true, degraded=true, undone="),
503+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
504+
Reason: "ClusterOperatorDegraded",
505+
PluralReason: "ClusterOperatorsDegraded",
506+
Message: "Cluster operator test-co is degraded",
507+
PluralMessageFormat: "Cluster operators %s are degraded",
508+
Name: "test-co",
483509
},
484510
}, {
485511
name: "cluster operator reporting available=true progressing=false degraded=false",

pkg/cvo/status.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ func (optr *Operator) syncStatus(ctx context.Context, original, config *configv1
353353

354354
// convertErrorToProgressing returns true if the provided status indicates a failure condition can be interpreted as
355355
// still making internal progress. The general error we try to suppress is an operator or operators still being
356-
// unavailable AND the general payload task making progress towards its goal. The error's UpdateEffect determines
356+
// progressing AND the general payload task making progress towards its goal. The error's UpdateEffect determines
357357
// whether an error should be considered a failure and, if so, whether the operator should be given up to 40 minutes
358358
// to recover from the error.
359359
func convertErrorToProgressing(history []configv1.UpdateHistory, now time.Time, status *SyncWorkerStatus) (reason string, message string, ok bool) {
@@ -372,7 +372,11 @@ func convertErrorToProgressing(history []configv1.UpdateHistory, now time.Time,
372372
case payload.UpdateEffectFailAfterInterval:
373373
var exceeded []string
374374
threshold := now.Add(-(40 * time.Minute))
375-
for _, name := range strings.Split(uErr.Name, ", ") {
375+
names := uErr.Names
376+
if len(names) == 0 {
377+
names = []string{uErr.Name}
378+
}
379+
for _, name := range names {
376380
if payload.COUpdateStartTimesGet(name).Before(threshold) {
377381
exceeded = append(exceeded, name)
378382
}

0 commit comments

Comments
 (0)