Skip to content

Commit fbf45e8

Browse files
committed
Merge pull request kubernetes#4946 from bprashanth/kubectl_retry
Retry resizing replication controllers in kubectl
2 parents 163411a + 1970c2d commit fbf45e8

File tree

7 files changed

+113
-12
lines changed

7 files changed

+113
-12
lines changed

cmd/kube-controller-manager/app/controllermanager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ func (s *CMServer) Run(_ []string) error {
144144
go util.Forever(func() { endpoints.SyncServiceEndpoints() }, time.Second*10)
145145

146146
controllerManager := replicationControllerPkg.NewReplicationManager(kubeClient)
147-
controllerManager.Run(10 * time.Second)
147+
controllerManager.Run(replicationControllerPkg.DefaultSyncPeriod)
148148

149149
kubeletClient, err := client.NewKubeletClient(&s.KubeletConfig)
150150
if err != nil {

cmd/kubernetes/kubernetes.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ func runControllerManager(machineList []string, cl *client.Client, nodeMilliCPU,
133133
go util.Forever(func() { endpoints.SyncServiceEndpoints() }, time.Second*10)
134134

135135
controllerManager := controller.NewReplicationManager(cl)
136-
controllerManager.Run(10 * time.Second)
136+
controllerManager.Run(controller.DefaultSyncPeriod)
137137
}
138138

139139
func startComponents(etcdClient tools.EtcdClient, cl *client.Client, addr net.IP, port int) {

pkg/controller/replication_controller.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ type RealPodControl struct {
5656
kubeClient client.Interface
5757
}
5858

59+
// Time period of main replication controller sync loop
60+
const DefaultSyncPeriod = 10 * time.Second
61+
5962
func (r RealPodControl) createReplica(namespace string, controller api.ReplicationController) {
6063
desiredLabels := make(labels.Set)
6164
for k, v := range controller.Spec.Template.Labels {

pkg/kubectl/cmd/resize.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@ package cmd
1919
import (
2020
"fmt"
2121
"io"
22+
"time"
2223

24+
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
2325
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl"
2426
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl/cmd/util"
27+
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
2528
"github.com/spf13/cobra"
2629
)
2730

@@ -37,6 +40,9 @@ $ kubectl resize --replicas=3 replicationcontrollers foo
3740
3841
// If the replication controller named foo's current size is 2, resize foo to 3.
3942
$ kubectl resize --current-replicas=2 --replicas=3 replicationcontrollers foo`
43+
44+
retryFrequency = controller.DefaultSyncPeriod / 100
45+
retryTimeout = 10 * time.Second
4046
)
4147

4248
func (f *Factory) NewCmdResize(out io.Writer) *cobra.Command {
@@ -63,9 +69,15 @@ func (f *Factory) NewCmdResize(out io.Writer) *cobra.Command {
6369

6470
resourceVersion := util.GetFlagString(cmd, "resource-version")
6571
currentSize := util.GetFlagInt(cmd, "current-replicas")
66-
s, err := resizer.Resize(namespace, name, &kubectl.ResizePrecondition{currentSize, resourceVersion}, uint(count))
67-
checkErr(err)
68-
fmt.Fprintf(out, "%s\n", s)
72+
precondition := &kubectl.ResizePrecondition{currentSize, resourceVersion}
73+
cond := kubectl.ResizeCondition(resizer, precondition, namespace, name, uint(count))
74+
75+
msg := "resized"
76+
if err = wait.Poll(retryFrequency, retryTimeout, cond); err != nil {
77+
msg = fmt.Sprintf("Failed to resize controller in spite of retrying for %s", retryTimeout)
78+
checkErr(err)
79+
}
80+
fmt.Fprintf(out, "%s\n", msg)
6981
},
7082
}
7183
cmd.Flags().String("resource-version", "", "Precondition for resource version. Requires that the current resource version match this value in order to resize.")

pkg/kubectl/resize.go

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
2424
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
25+
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
2526
)
2627

2728
// ResizePrecondition describes a condition that must be true for the resize to take place
@@ -33,23 +34,46 @@ type ResizePrecondition struct {
3334
ResourceVersion string
3435
}
3536

37+
// A PreconditionError is returned when a replication controller fails to match
38+
// the resize preconditions passed to kubectl.
3639
type PreconditionError struct {
3740
Precondition string
3841
ExpectedValue string
3942
ActualValue string
4043
}
4144

42-
func (pe *PreconditionError) Error() string {
45+
func (pe PreconditionError) Error() string {
4346
return fmt.Sprintf("Expected %s to be %s, was %s", pe.Precondition, pe.ExpectedValue, pe.ActualValue)
4447
}
4548

49+
type ControllerResizeErrorType int
50+
51+
const (
52+
ControllerResizeGetFailure ControllerResizeErrorType = iota
53+
ControllerResizeUpdateFailure
54+
)
55+
56+
// A ControllerResizeError is returned when a the resize request passes
57+
// preconditions but fails to actually resize the controller.
58+
type ControllerResizeError struct {
59+
FailureType ControllerResizeErrorType
60+
ResourceVersion string
61+
ActualError error
62+
}
63+
64+
func (c ControllerResizeError) Error() string {
65+
return fmt.Sprintf(
66+
"Resizing the controller failed with: %s; Current resource version %s",
67+
c.ActualError, c.ResourceVersion)
68+
}
69+
4670
// Validate ensures that the preconditions match. Returns nil if they are valid, an error otherwise
4771
func (precondition *ResizePrecondition) Validate(controller *api.ReplicationController) error {
4872
if precondition.Size != -1 && controller.Spec.Replicas != precondition.Size {
49-
return &PreconditionError{"replicas", strconv.Itoa(precondition.Size), strconv.Itoa(controller.Spec.Replicas)}
73+
return PreconditionError{"replicas", strconv.Itoa(precondition.Size), strconv.Itoa(controller.Spec.Replicas)}
5074
}
5175
if precondition.ResourceVersion != "" && controller.ResourceVersion != precondition.ResourceVersion {
52-
return &PreconditionError{"resource version", precondition.ResourceVersion, controller.ResourceVersion}
76+
return PreconditionError{"resource version", precondition.ResourceVersion, controller.ResourceVersion}
5377
}
5478
return nil
5579
}
@@ -70,11 +94,27 @@ type ReplicationControllerResizer struct {
7094
client.Interface
7195
}
7296

97+
// ResizeCondition is a closure around Resize that facilitates retries via util.wait
98+
func ResizeCondition(r Resizer, precondition *ResizePrecondition, namespace, name string, count uint) wait.ConditionFunc {
99+
return func() (bool, error) {
100+
_, err := r.Resize(namespace, name, precondition, count)
101+
switch e, _ := err.(ControllerResizeError); err.(type) {
102+
case nil:
103+
return true, nil
104+
case ControllerResizeError:
105+
if e.FailureType == ControllerResizeUpdateFailure {
106+
return false, nil
107+
}
108+
}
109+
return false, err
110+
}
111+
}
112+
73113
func (resize *ReplicationControllerResizer) Resize(namespace, name string, preconditions *ResizePrecondition, newSize uint) (string, error) {
74114
rc := resize.ReplicationControllers(namespace)
75115
controller, err := rc.Get(name)
76116
if err != nil {
77-
return "", err
117+
return "", ControllerResizeError{ControllerResizeGetFailure, "Unknown", err}
78118
}
79119

80120
if preconditions != nil {
@@ -86,7 +126,7 @@ func (resize *ReplicationControllerResizer) Resize(namespace, name string, preco
86126
controller.Spec.Replicas = int(newSize)
87127
// TODO: do retry on 409 errors here?
88128
if _, err := rc.Update(controller); err != nil {
89-
return "", err
129+
return "", ControllerResizeError{ControllerResizeUpdateFailure, controller.ResourceVersion, err}
90130
}
91131
// TODO: do a better job of printing objects here.
92132
return "resized", nil

pkg/kubectl/resize_test.go

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,53 @@ limitations under the License.
1717
package kubectl
1818

1919
import (
20-
// "strings"
20+
"errors"
2121
"testing"
2222

2323
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
2424
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
25-
// "github.com/GoogleCloudPlatform/kubernetes/pkg/util"
2625
)
2726

27+
type ErrorReplicationControllers struct {
28+
client.FakeReplicationControllers
29+
}
30+
31+
func (c *ErrorReplicationControllers) Update(controller *api.ReplicationController) (*api.ReplicationController, error) {
32+
return nil, errors.New("Replication controller update failure")
33+
}
34+
35+
type ErrorReplicationControllerClient struct {
36+
client.Fake
37+
}
38+
39+
func (c *ErrorReplicationControllerClient) ReplicationControllers(namespace string) client.ReplicationControllerInterface {
40+
return &ErrorReplicationControllers{client.FakeReplicationControllers{&c.Fake, namespace}}
41+
}
42+
43+
func TestReplicationControllerResizeRetry(t *testing.T) {
44+
fake := &ErrorReplicationControllerClient{Fake: client.Fake{}}
45+
resizer := ReplicationControllerResizer{fake}
46+
preconditions := ResizePrecondition{-1, ""}
47+
count := uint(3)
48+
name := "foo"
49+
namespace := "default"
50+
51+
resizeFunc := ResizeCondition(&resizer, &preconditions, namespace, name, count)
52+
pass, err := resizeFunc()
53+
if pass != false {
54+
t.Errorf("Expected an update failure to return pass = false, got pass = %v", pass)
55+
}
56+
if err != nil {
57+
t.Errorf("Did not expect an error on update failure, got %v", err)
58+
}
59+
preconditions = ResizePrecondition{3, ""}
60+
resizeFunc = ResizeCondition(&resizer, &preconditions, namespace, name, count)
61+
pass, err = resizeFunc()
62+
if err == nil {
63+
t.Errorf("Expected error on precondition failure")
64+
}
65+
}
66+
2867
func TestReplicationControllerResize(t *testing.T) {
2968
fake := &client.Fake{}
3069
resizer := ReplicationControllerResizer{fake}

pkg/util/wait/wait_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@ func TestPoll(t *testing.T) {
6868
if invocations == 0 {
6969
t.Errorf("Expected at least one invocation, got zero")
7070
}
71+
expectedError := errors.New("Expected error")
72+
f = ConditionFunc(func() (bool, error) {
73+
return false, expectedError
74+
})
75+
if err := Poll(time.Microsecond, time.Microsecond, f); err == nil || err != expectedError {
76+
t.Fatalf("Expected error %v, got none %v", expectedError, err)
77+
}
7178
}
7279

7380
func TestPollForever(t *testing.T) {

0 commit comments

Comments
 (0)