Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 89 additions & 106 deletions pkg/cvo/internal/operatorstatus.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@ import (
"fmt"
"sort"
"strings"
"time"
"unicode"

kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/rest"

configv1 "github.com/openshift/api/config/v1"
Expand Down Expand Up @@ -123,130 +121,115 @@ func (b *clusterOperatorBuilder) Do(ctx context.Context) error {
return nil
}

return waitForOperatorStatusToBeDone(ctx, 1*time.Second, b.client, os, b.mode)
return checkOperatorHealth(ctx, b.client, os, b.mode)
}

func waitForOperatorStatusToBeDone(ctx context.Context, interval time.Duration, client ClusterOperatorsGetter, expected *configv1.ClusterOperator, mode resourcebuilder.Mode) error {
var lastErr error
err := wait.PollImmediateUntil(interval, func() (bool, error) {
actual, err := client.Get(ctx, expected.Name)
if err != nil {
lastErr = &payload.UpdateError{
Nested: err,
UpdateEffect: payload.UpdateEffectNone,
Reason: "ClusterOperatorNotAvailable",
Message: fmt.Sprintf("Cluster operator %s has not yet reported success", expected.Name),
Name: expected.Name,
}
return false, nil
func checkOperatorHealth(ctx context.Context, client ClusterOperatorsGetter, expected *configv1.ClusterOperator, mode resourcebuilder.Mode) error {
if len(expected.Status.Versions) == 0 {
return &payload.UpdateError{
UpdateEffect: payload.UpdateEffectFail,
Reason: "ClusterOperatorNotAvailable",
Message: fmt.Sprintf("Cluster operator %s does not declare expected versions", expected.Name),
Name: expected.Name,
}
}

if len(expected.Status.Versions) == 0 {
lastErr = &payload.UpdateError{
UpdateEffect: payload.UpdateEffectFail,
Reason: "ClusterOperatorNotAvailable",
Message: fmt.Sprintf("Cluster operator %s does not declare expected versions", expected.Name),
Name: expected.Name,
}
return false, nil
actual, err := client.Get(ctx, expected.Name)
if err != nil {
return &payload.UpdateError{
Nested: err,
UpdateEffect: payload.UpdateEffectNone,
Reason: "ClusterOperatorNotAvailable",
Message: fmt.Sprintf("Cluster operator %s has not yet reported success", expected.Name),
Name: expected.Name,
}
}

// undone is a sorted slice of transition messages for incomplete operands.
undone := make([]string, 0, len(expected.Status.Versions))
for _, expOp := range expected.Status.Versions {
current := ""
for _, actOp := range actual.Status.Versions {
if actOp.Name == expOp.Name {
current = actOp.Version
break
}
}
if current != expOp.Version {
if current == "" {
undone = append(undone, fmt.Sprintf("%s to %s", expOp.Name, expOp.Version))
} else {
undone = append(undone, fmt.Sprintf("%s from %s to %s", expOp.Name, current, expOp.Version))
}
// undone is a sorted slice of transition messages for incomplete operands.
undone := make([]string, 0, len(expected.Status.Versions))
for _, expOp := range expected.Status.Versions {
current := ""
for _, actOp := range actual.Status.Versions {
if actOp.Name == expOp.Name {
current = actOp.Version
break
}
}
sort.Strings(undone)

available := false
var availableCondition *configv1.ClusterOperatorStatusCondition
progressing := true
degraded := true
var degradedCondition *configv1.ClusterOperatorStatusCondition
for i := range actual.Status.Conditions {
condition := &actual.Status.Conditions[i]
switch {
case condition.Type == configv1.OperatorAvailable:
if condition.Status == configv1.ConditionTrue {
available = true
}
availableCondition = condition
case condition.Type == configv1.OperatorProgressing && condition.Status == configv1.ConditionFalse:
progressing = false
case condition.Type == configv1.OperatorDegraded:
if condition.Status == configv1.ConditionFalse {
degraded = false
}
degradedCondition = condition
if current != expOp.Version {
if current == "" {
undone = append(undone, fmt.Sprintf("%s to %s", expOp.Name, expOp.Version))
} else {
undone = append(undone, fmt.Sprintf("%s from %s to %s", expOp.Name, current, expOp.Version))
}
}

nestedMessage := fmt.Errorf("cluster operator %s: available=%v, progressing=%v, degraded=%v, undone=%s",
actual.Name, available, progressing, degraded, strings.Join(undone, ", "))

if !available {
if availableCondition != nil && len(availableCondition.Message) > 0 {
nestedMessage = fmt.Errorf("cluster operator %s is %s=%s: %s: %s", actual.Name, availableCondition.Type, availableCondition.Status, availableCondition.Reason, availableCondition.Message)
}
sort.Strings(undone)

available := false
var availableCondition *configv1.ClusterOperatorStatusCondition
progressing := true
degraded := true
var degradedCondition *configv1.ClusterOperatorStatusCondition
for i := range actual.Status.Conditions {
condition := &actual.Status.Conditions[i]
switch {
case condition.Type == configv1.OperatorAvailable:
if condition.Status == configv1.ConditionTrue {
available = true
}
lastErr = &payload.UpdateError{
Nested: nestedMessage,
UpdateEffect: payload.UpdateEffectFail,
Reason: "ClusterOperatorNotAvailable",
Message: fmt.Sprintf("Cluster operator %s is not available", actual.Name),
Name: actual.Name,
availableCondition = condition
case condition.Type == configv1.OperatorProgressing && condition.Status == configv1.ConditionFalse:
progressing = false
case condition.Type == configv1.OperatorDegraded:
if condition.Status == configv1.ConditionFalse {
degraded = false
}
return false, nil
degradedCondition = condition
}
}

// during initialization we allow degraded
if degraded && mode != resourcebuilder.InitializingMode {
if degradedCondition != nil && len(degradedCondition.Message) > 0 {
nestedMessage = fmt.Errorf("cluster operator %s is %s=%s: %s, %s", actual.Name, degradedCondition.Type, degradedCondition.Status, degradedCondition.Reason, degradedCondition.Message)
}
lastErr = &payload.UpdateError{
Nested: nestedMessage,
UpdateEffect: payload.UpdateEffectFailAfterInterval,
Reason: "ClusterOperatorDegraded",
Message: fmt.Sprintf("Cluster operator %s is degraded", actual.Name),
Name: actual.Name,
}
return false, nil
nestedMessage := fmt.Errorf("cluster operator %s: available=%v, progressing=%v, degraded=%v, undone=%s",
actual.Name, available, progressing, degraded, strings.Join(undone, ", "))

if !available {
if availableCondition != nil && len(availableCondition.Message) > 0 {
nestedMessage = fmt.Errorf("cluster operator %s is %s=%s: %s: %s", actual.Name, availableCondition.Type, availableCondition.Status, availableCondition.Reason, availableCondition.Message)
}
return &payload.UpdateError{
Nested: nestedMessage,
UpdateEffect: payload.UpdateEffectFail,
Reason: "ClusterOperatorNotAvailable",
Message: fmt.Sprintf("Cluster operator %s is not available", actual.Name),
Name: actual.Name,
}
}

// during initialization we allow undone versions
if len(undone) > 0 && mode != resourcebuilder.InitializingMode {
nestedMessage = fmt.Errorf("cluster operator %s is available and not degraded but has not finished updating to target version", actual.Name)
lastErr = &payload.UpdateError{
Nested: nestedMessage,
UpdateEffect: payload.UpdateEffectNone,
Reason: "ClusterOperatorUpdating",
Message: fmt.Sprintf("Cluster operator %s is updating versions", actual.Name),
Name: actual.Name,
}
return false, nil
// during initialization we allow degraded
if degraded && mode != resourcebuilder.InitializingMode {
if degradedCondition != nil && len(degradedCondition.Message) > 0 {
nestedMessage = fmt.Errorf("cluster operator %s is %s=%s: %s, %s", actual.Name, degradedCondition.Type, degradedCondition.Status, degradedCondition.Reason, degradedCondition.Message)
}
return &payload.UpdateError{
Nested: nestedMessage,
UpdateEffect: payload.UpdateEffectFailAfterInterval,
Reason: "ClusterOperatorDegraded",
Message: fmt.Sprintf("Cluster operator %s is degraded", actual.Name),
Name: actual.Name,
}
}

return true, nil
}, ctx.Done())
if err != nil {
if err == wait.ErrWaitTimeout && lastErr != nil {
return lastErr
// during initialization we allow undone versions
if len(undone) > 0 && mode != resourcebuilder.InitializingMode {
nestedMessage = fmt.Errorf("cluster operator %s is available and not degraded but has not finished updating to target version", actual.Name)
return &payload.UpdateError{
Nested: nestedMessage,
UpdateEffect: payload.UpdateEffectNone,
Reason: "ClusterOperatorUpdating",
Message: fmt.Sprintf("Cluster operator %s is updating versions", actual.Name),
Name: actual.Name,
}
return err
}

return nil
}

Expand Down
8 changes: 3 additions & 5 deletions pkg/cvo/internal/operatorstatus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"reflect"
"testing"
"time"

"github.com/davecgh/go-spew/spew"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -20,7 +19,8 @@ import (
"github.com/openshift/cluster-version-operator/pkg/payload"
)

func Test_waitForOperatorStatusToBeDone(t *testing.T) {
func Test_checkOperatorHealth(t *testing.T) {
ctx := context.Background()
tests := []struct {
name string
actual *configv1.ClusterOperator
Expand Down Expand Up @@ -528,9 +528,7 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) {
return false, nil, fmt.Errorf("unexpected client action: %#v", action)
})

ctxWithTimeout, cancel := context.WithTimeout(context.TODO(), 1*time.Millisecond)
defer cancel()
err := waitForOperatorStatusToBeDone(ctxWithTimeout, 1*time.Millisecond, clientClusterOperatorsGetter{getter: client.ConfigV1().ClusterOperators()}, test.exp, test.mode)
err := checkOperatorHealth(ctx, clientClusterOperatorsGetter{getter: client.ConfigV1().ClusterOperators()}, test.exp, test.mode)
if (test.expErr == nil) != (err == nil) {
t.Fatalf("unexpected error: %v", err)
}
Expand Down