Skip to content

Commit a53efd5

Browse files
Support --options on upgrade tests to abort in progress
To better stress test upgrades, add disruption elements for aborting an upgrade part of the way through as well as rebooting random masters. --options=abort-at=PERCENT will cause the upgrade to stop and roll back to the previous version when PERCENT of operators have been upgraded. 100 will be after the upgrade is complete, while 'random' will be at a randomly chosen percent. --options=disrupt-reboot=POLICY causes random periodic reboots of masters during upgradse. If set to 'graceful' the reboot allows clean shutdown. If set to 'force' the machines immediate exit (to simulate power loss).
1 parent 9cf4d51 commit a53efd5

File tree

7 files changed

+553
-176
lines changed

7 files changed

+553
-176
lines changed

cmd/openshift-tests/openshift-tests.go

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ func newRunCommand() *cobra.Command {
116116
if err := initProvider(opt.Provider); err != nil {
117117
return err
118118
}
119-
os.Setenv("TEST_PROVIDER", opt.Provider)
120119
e2e.AfterReadingAllFlags(exutil.TestContext)
121120
return opt.Run(args)
122121
})
@@ -142,9 +141,18 @@ func newRunUpgradeCommand() *cobra.Command {
142141
If you specify the --dry-run argument, the actions the suite will take will be printed to the
143142
output.
144143
144+
Supported options:
145+
146+
* abort-at=NUMBER - Set to a number between 0 and 100 to control the percent of operators
147+
at which to stop the current upgrade and roll back to the current version.
148+
* disrupt-reboot=POLICY - During upgrades, periodically reboot master nodes. If set to 'graceful'
149+
the reboot will allow the node to shut down services in an orderly fashion. If set to 'force' the
150+
machine will terminate immediately without clean shutdown.
151+
145152
`) + testginkgo.SuitesString(opt.Suites, "\n\nAvailable upgrade suites:\n\n"),
146153

147-
SilenceUsage: true,
154+
SilenceUsage: true,
155+
SilenceErrors: true,
148156
RunE: func(cmd *cobra.Command, args []string) error {
149157
return mirrorToFile(opt, func() error {
150158
if len(upgradeOpt.ToImage) == 0 {
@@ -156,7 +164,11 @@ func newRunUpgradeCommand() *cobra.Command {
156164
if suite.Name == args[0] {
157165
upgradeOpt.Suite = suite.Name
158166
upgradeOpt.JUnitDir = opt.JUnitDir
159-
os.Setenv("TEST_UPGRADE", upgradeOpt.ToEnv())
167+
value := upgradeOpt.ToEnv()
168+
if err := initUpgrade(value); err != nil {
169+
return err
170+
}
171+
opt.SuiteOptions = value
160172
break
161173
}
162174
}
@@ -165,7 +177,6 @@ func newRunUpgradeCommand() *cobra.Command {
165177
if err := initProvider(opt.Provider); err != nil {
166178
return err
167179
}
168-
os.Setenv("TEST_PROVIDER", opt.Provider)
169180
e2e.AfterReadingAllFlags(exutil.TestContext)
170181
return opt.Run(args)
171182
})
@@ -198,7 +209,7 @@ func newRunTestCommand() *cobra.Command {
198209
if err := initProvider(os.Getenv("TEST_PROVIDER")); err != nil {
199210
return err
200211
}
201-
if err := initUpgrade(os.Getenv("TEST_UPGRADE")); err != nil {
212+
if err := initUpgrade(os.Getenv("TEST_SUITE_OPTIONS")); err != nil {
202213
return err
203214
}
204215
e2e.AfterReadingAllFlags(exutil.TestContext)
@@ -236,6 +247,7 @@ func mirrorToFile(opt *testginkgo.Options, fn func() error) error {
236247

237248
func bindOptions(opt *testginkgo.Options, flags *pflag.FlagSet) {
238249
flags.BoolVar(&opt.DryRun, "dry-run", opt.DryRun, "Print the tests to run without executing them.")
250+
flags.BoolVar(&opt.PrintCommands, "print-commands", opt.PrintCommands, "Print the sub-commands that would be executed instead.")
239251
flags.StringVar(&opt.JUnitDir, "junit-dir", opt.JUnitDir, "The directory to write test reports to.")
240252
flags.StringVar(&opt.Provider, "provider", opt.Provider, "The cluster infrastructure provider. Will automatically default to the correct value.")
241253
flags.StringVarP(&opt.TestFile, "file", "f", opt.TestFile, "Create a suite from the newline-delimited test names in this file.")

cmd/openshift-tests/upgrade.go

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,23 @@ var upgradeSuites = []*ginkgo.TestSuite{
2525
`),
2626
Matches: func(name string) bool { return strings.Contains(name, "[Feature:ClusterUpgrade]") },
2727

28-
Init: func() error { return filterUpgrade(upgrade.AllTests(), func(name string) bool { return true }) },
28+
Init: func(opt map[string]string) error {
29+
for k, v := range opt {
30+
switch k {
31+
case "abort-at":
32+
if err := upgrade.SetUpgradeAbortAt(v); err != nil {
33+
return err
34+
}
35+
case "disrupt-reboot":
36+
if err := upgrade.SetUpgradeDisruptReboot(v); err != nil {
37+
return err
38+
}
39+
default:
40+
return fmt.Errorf("unrecognized upgrade option: %s", k)
41+
}
42+
}
43+
return filterUpgrade(upgrade.AllTests(), func(name string) bool { return true })
44+
},
2945
TestTimeout: 120 * time.Minute,
3046
},
3147
}
@@ -34,6 +50,27 @@ type UpgradeOptions struct {
3450
Suite string
3551
ToImage string
3652
JUnitDir string
53+
54+
TestOptions []string
55+
}
56+
57+
func (o *UpgradeOptions) OptionsMap() (map[string]string, error) {
58+
options := make(map[string]string)
59+
for _, option := range o.TestOptions {
60+
parts := strings.SplitN(option, "=", 2)
61+
if len(parts) != 2 {
62+
return nil, fmt.Errorf("test option %q is not valid, must be KEY=VALUE", option)
63+
}
64+
if len(parts[0]) == 0 {
65+
return nil, fmt.Errorf("test option %q is not valid, must be KEY=VALUE", option)
66+
}
67+
_, exists := options[parts[0]]
68+
if exists {
69+
return nil, fmt.Errorf("option %q declared twice", parts[0])
70+
}
71+
options[parts[0]] = parts[1]
72+
}
73+
return options, nil
3774
}
3875

3976
func (o *UpgradeOptions) ToEnv() string {
@@ -57,8 +94,12 @@ func initUpgrade(value string) error {
5794
exutil.TestContext.UpgradeTarget = ""
5895
exutil.TestContext.UpgradeImage = opt.ToImage
5996
exutil.TestContext.ReportDir = opt.JUnitDir
97+
o, err := opt.OptionsMap()
98+
if err != nil {
99+
return err
100+
}
60101
if suite.Init != nil {
61-
return suite.Init()
102+
return suite.Init(o)
62103
}
63104
return nil
64105
}
@@ -79,4 +120,5 @@ func filterUpgrade(tests []upgrades.Test, match func(string) bool) error {
79120

80121
func bindUpgradeOptions(opt *UpgradeOptions, flags *pflag.FlagSet) {
81122
flags.StringVar(&opt.ToImage, "to-image", opt.ToImage, "Specify the image to test an upgrade to.")
123+
flags.StringSliceVar(&opt.TestOptions, "options", opt.TestOptions, "A set of KEY=VALUE options to control the test. See the help text.")
82124
}

pkg/test/ginkgo/cmd_runsuite.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,21 @@ type Options struct {
3030

3131
IncludeSuccessOutput bool
3232

33-
Provider string
33+
Provider string
34+
SuiteOptions string
3435

3536
Suites []*TestSuite
3637

37-
DryRun bool
38-
Out, ErrOut io.Writer
38+
DryRun bool
39+
PrintCommands bool
40+
Out, ErrOut io.Writer
41+
}
42+
43+
func (opt *Options) AsEnv() []string {
44+
var args []string
45+
args = append(args, fmt.Sprintf("TEST_PROVIDER=%s", opt.Provider))
46+
args = append(args, fmt.Sprintf("TEST_SUITE_OPTIONS=%s", opt.SuiteOptions))
47+
return args
3948
}
4049

4150
func (opt *Options) Run(args []string) error {
@@ -104,6 +113,11 @@ func (opt *Options) Run(args []string) error {
104113
return fmt.Errorf("suite %q does not contain any tests", suite.Name)
105114
}
106115

116+
if opt.PrintCommands {
117+
status := newTestStatus(opt.Out, true, len(tests), time.Minute, &monitor.Monitor{}, opt.AsEnv())
118+
newParallelTestQueue(tests).Execute(context.Background(), 1, status.OutputCommand)
119+
return nil
120+
}
107121
if opt.DryRun {
108122
for _, test := range sortedTests(tests) {
109123
fmt.Fprintf(opt.Out, "%q\n", test.name)
@@ -164,7 +178,7 @@ func (opt *Options) Run(args []string) error {
164178
if len(tests) == 1 {
165179
includeSuccess = true
166180
}
167-
status := newTestStatus(opt.Out, includeSuccess, len(tests), timeout, m)
181+
status := newTestStatus(opt.Out, includeSuccess, len(tests), timeout, m, opt.AsEnv())
168182

169183
smoke, normal := splitTests(tests, func(t *testCase) bool {
170184
return strings.Contains(t.name, "[Smoke]")
@@ -255,7 +269,7 @@ func (opt *Options) Run(args []string) error {
255269
}
256270

257271
q := newParallelTestQueue(retries)
258-
status := newTestStatus(ioutil.Discard, opt.IncludeSuccessOutput, len(retries), timeout, m)
272+
status := newTestStatus(ioutil.Discard, opt.IncludeSuccessOutput, len(retries), timeout, m, opt.AsEnv())
259273
q.Execute(ctx, parallelism, status.Run)
260274
var flaky []string
261275
var repeatFailures []*testCase

pkg/test/ginkgo/status.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
package ginkgo
22

33
import (
4+
"bytes"
45
"context"
56
"fmt"
67
"io"
78
"os"
89
"os/exec"
910
"sort"
11+
"strings"
1012
"sync"
1113
"syscall"
1214
"time"
@@ -18,6 +20,7 @@ type testStatus struct {
1820
out io.Writer
1921
timeout time.Duration
2022
monitor monitor.Interface
23+
env []string
2124

2225
includeSuccessfulOutput bool
2326

@@ -27,12 +30,13 @@ type testStatus struct {
2730
total int
2831
}
2932

30-
func newTestStatus(out io.Writer, includeSuccessfulOutput bool, total int, timeout time.Duration, m monitor.Interface) *testStatus {
33+
func newTestStatus(out io.Writer, includeSuccessfulOutput bool, total int, timeout time.Duration, m monitor.Interface, testEnv []string) *testStatus {
3134
return &testStatus{
3235
out: out,
3336
total: total,
3437
timeout: timeout,
3538
monitor: m,
39+
env: testEnv,
3640

3741
includeSuccessfulOutput: includeSuccessfulOutput,
3842
}
@@ -53,6 +57,17 @@ func (s *testStatus) Fprintf(format string) {
5357
fmt.Fprintf(s.out, format, s.failures, s.index, s.total)
5458
}
5559

60+
// OutputCommand prints to stdout what would have been executed.
61+
func (s *testStatus) OutputCommand(ctx context.Context, test *testCase) {
62+
buf := &bytes.Buffer{}
63+
for _, env := range s.env {
64+
parts := strings.SplitN(env, "=", 2)
65+
fmt.Fprintf(buf, "%s=%q ", parts[0], parts[1])
66+
}
67+
fmt.Fprintf(buf, "%s %s %q", os.Args[0], "run-test", test.name)
68+
fmt.Fprintln(s.out, buf.String())
69+
}
70+
5671
func (s *testStatus) Run(ctx context.Context, test *testCase) {
5772
defer func() {
5873
switch {
@@ -94,6 +109,7 @@ func (s *testStatus) Run(ctx context.Context, test *testCase) {
94109

95110
test.start = time.Now()
96111
c := exec.Command(os.Args[0], "run-test", test.name)
112+
c.Env = append(os.Environ(), s.env...)
97113
s.Fprintf(fmt.Sprintf("started: (%s) %q\n\n", "%d/%d/%d", test.name))
98114
out, err := runWithTimeout(ctx, c, s.timeout)
99115
test.end = time.Now()

pkg/test/ginkgo/test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ type TestSuite struct {
6363

6464
// Init should be run once before a test in this suite is run. Not called by
6565
// methods in this package.
66-
Init func() error
66+
Init func(map[string]string) error
6767

6868
Parallelism int
6969
// The number of flakes that may occur before this test is marked as a failure.

0 commit comments

Comments
 (0)