Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions pkg/cvo/cvo.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,9 @@ type Operator struct {
upgradeable *upgradeable
upgradeableChecks []upgradeableCheck

// minimumUpgradeableCheckInterval is the minimum duration before another
// synchronization of the upgradeable status can happen during failing
// precondition checks.
minimumUpgradeableCheckInterval time.Duration
// upgradeableCheckIntervals drives minimal intervals between Upgradeable status
// synchronization
upgradeableCheckIntervals upgradeableCheckIntervals

// verifier, if provided, will be used to check an update before it is executed.
// Any error will prevent an update payload from being accessed.
Expand Down Expand Up @@ -188,11 +187,11 @@ func New(
Image: releaseImage,
},

statusInterval: 15 * time.Second,
minimumUpdateCheckInterval: minimumInterval,
minimumUpgradeableCheckInterval: 15 * time.Second,
payloadDir: overridePayloadDir,
defaultUpstreamServer: "https://api.openshift.com/api/upgrades_info/v1/graph",
statusInterval: 15 * time.Second,
minimumUpdateCheckInterval: minimumInterval,
upgradeableCheckIntervals: defaultUpgradeableCheckIntervals(),
payloadDir: overridePayloadDir,
defaultUpstreamServer: "https://api.openshift.com/api/upgrades_info/v1/graph",

client: client,
kubeClient: kubeClient,
Expand Down
75 changes: 50 additions & 25 deletions pkg/cvo/upgradeable.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,41 @@ const (

var adminAckGateRegexp = regexp.MustCompile(adminAckGateFmt)

// syncUpgradeable synchronizes the upgradeable status only if it has been more than
// the minimumUpdateCheckInterval since the last synchronization or the precondition
// checks on the payload are failing for less than minimumUpdateCheckInterval, and it has
// been more than the minimumUpgradeableCheckInterval since the last synchronization.
func (optr *Operator) syncUpgradeable(config *configv1.ClusterVersion) error {
u := optr.getUpgradeable()
if u != nil && u.RecentlyChanged(optr.minimumUpdateCheckInterval) && !shouldSyncUpgradeableDueToPreconditionChecks(optr, config, u) {
klog.V(2).Infof("Upgradeable conditions were recently checked, will try later.")
return nil
// upgradeableCheckIntervals holds the time intervals that drive how often CVO checks for upgradeability
type upgradeableCheckIntervals struct {
// min is the base minimum interval between upgradeability checks, applied under normal circumstances
min time.Duration

// minOnFailedPreconditions is the minimum interval between upgradeability checks when precondition checks are
// failing and were recently (see afterPreconditionsFailed) changed. This should be lower than min because we want CVO
// to check upgradeability more often
minOnFailedPreconditions time.Duration

// afterFailingPreconditions is the period of time after preconditions failed when minOnFailedPreconditions is
// applied instead of min
afterPreconditionsFailed time.Duration
}

func defaultUpgradeableCheckIntervals() upgradeableCheckIntervals {
return upgradeableCheckIntervals{
// 2 minutes are here because it is a lower bound of previously nondeterministicly chosen interval
// TODO (OTA-860): Investigate our options of reducing this interval. We will need to investigate
// the API usage patterns of the underlying checks, there is anecdotal evidence that they hit
// apiserver instead of using local informer cache
min: 2 * time.Minute,
minOnFailedPreconditions: 15 * time.Second,
afterPreconditionsFailed: 2 * time.Minute,
}
}

// syncUpgradeable synchronizes the upgradeable status only if the sufficient time passed since its last update. This
// throttling period is dynamic and is driven by upgradeableCheckIntervals.
func (optr *Operator) syncUpgradeable(cv *configv1.ClusterVersion) error {
if u := optr.getUpgradeable(); u != nil {
if u.RecentlyChanged(optr.upgradeableCheckIntervals.throttlePeriod(cv)) {
klog.V(2).Infof("Upgradeable conditions were recently checked, will try later.")
return nil
}
}
optr.setUpgradeableConditions()

Expand Down Expand Up @@ -466,21 +492,20 @@ func (optr *Operator) adminGatesEventHandler() cache.ResourceEventHandler {
}
}

// shouldSyncUpgradeableDueToPreconditionChecks checks if the upgradeable status should
// be synchronized due to the precondition checks. It checks whether the precondition
// checks on the payload are failing for less than minimumUpdateCheckInterval, and it has
// been more than the minimumUpgradeableCheckInterval since the last synchronization.
// This means, upon precondition failure, we will synchronize the upgradeable status
// more frequently at beginning of an upgrade.
//
// shouldSyncUpgradeableDueToPreconditionChecks expects the parameters not to be nil.
// throttlePeriod returns the duration for which upgradeable status should be considered recent
// enough and unnecessary to update. The baseline duration is min. When the precondition checks
// on the payload are failing for less than afterPreconditionsFailed we want to synchronize
// the upgradeable status more frequently at beginning of an upgrade and return
// minOnFailedPreconditions which is expected to be lower than min.
//
// Function returns true if the synchronization should happen, returns false otherwise.
func shouldSyncUpgradeableDueToPreconditionChecks(optr *Operator, config *configv1.ClusterVersion, u *upgradeable) bool {
cond := resourcemerge.FindOperatorStatusCondition(config.Status.Conditions, DesiredReleaseAccepted)
if cond != nil && cond.Reason == "PreconditionChecks" && cond.Status == configv1.ConditionFalse &&
hasPassedDurationSinceTime(u.At, optr.minimumUpgradeableCheckInterval) && !hasPassedDurationSinceTime(cond.LastTransitionTime.Time, optr.minimumUpdateCheckInterval) {
return true
}
return false
// The cv parameter is expected to be non-nil.
func (intervals *upgradeableCheckIntervals) throttlePeriod(cv *configv1.ClusterVersion) time.Duration {
if cond := resourcemerge.FindOperatorStatusCondition(cv.Status.Conditions, DesiredReleaseAccepted); cond != nil {
// Function returns true if the synchronization should happen, returns false otherwise.
if cond.Reason == "PreconditionChecks" && cond.Status == configv1.ConditionFalse &&
!hasPassedDurationSinceTime(cond.LastTransitionTime.Time, intervals.afterPreconditionsFailed) {
return intervals.minOnFailedPreconditions
}
}
return intervals.min
}
59 changes: 59 additions & 0 deletions pkg/cvo/upgradeable_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package cvo

import (
"testing"
"time"

configv1 "github.com/openshift/api/config/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestUpgradeableCheckIntervalsThrottlePeriod(t *testing.T) {
intervals := defaultUpgradeableCheckIntervals()
testCases := []struct {
name string
condition *configv1.ClusterOperatorStatusCondition
expected time.Duration
}{
{
name: "no condition",
expected: intervals.min,
},
{
name: "passing preconditions",
condition: &configv1.ClusterOperatorStatusCondition{Type: DesiredReleaseAccepted, Reason: "PreconditionChecks", Status: configv1.ConditionTrue, LastTransitionTime: metav1.Now()},
expected: intervals.min,
},
{
name: "failing but not precondition",
condition: &configv1.ClusterOperatorStatusCondition{Type: DesiredReleaseAccepted, Reason: "NotPreconditionChecks", Status: configv1.ConditionFalse, LastTransitionTime: metav1.Now()},
expected: intervals.min,
},
{
name: "failing preconditions but too long ago",
condition: &configv1.ClusterOperatorStatusCondition{
Type: DesiredReleaseAccepted,
Reason: "PreconditionChecks",
Status: configv1.ConditionFalse,
LastTransitionTime: metav1.NewTime(time.Now().Add(-(intervals.afterPreconditionsFailed + time.Hour))),
},
expected: intervals.min,
},
{
name: "failing preconditions recently",
condition: &configv1.ClusterOperatorStatusCondition{Type: DesiredReleaseAccepted, Reason: "PreconditionChecks", Status: configv1.ConditionFalse, LastTransitionTime: metav1.Now()},
expected: intervals.minOnFailedPreconditions,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
cv := &configv1.ClusterVersion{Status: configv1.ClusterVersionStatus{Conditions: []configv1.ClusterOperatorStatusCondition{}}}
if tc.condition != nil {
cv.Status.Conditions = append(cv.Status.Conditions, *tc.condition)
}
if actual := intervals.throttlePeriod(cv); actual != tc.expected {
t.Errorf("throttlePeriod() = %v, want %v", actual, tc.expected)
}
})
}
}