Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 100 additions & 14 deletions test/extended/cli/adm_upgrade/recommend.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"net"
"net/url"
"os"
"strconv"
"strings"
"text/template"
Expand All @@ -31,6 +32,7 @@ var _ = g.Describe("[Serial][sig-cli] oc adm upgrade recommend", g.Ordered, func
oc := exutil.NewCLIWithFramework(f).AsAdmin()
var cv *configv1.ClusterVersion
var restoreChannel, restoreUpstream bool
var caBundleFilePath string

g.BeforeAll(func() {
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
Expand All @@ -51,10 +53,14 @@ var _ = g.Describe("[Serial][sig-cli] oc adm upgrade recommend", g.Ordered, func
if restoreUpstream {
oc.Run("patch", "clusterversions.config.openshift.io", "version", "--type", "json", "-p", fmt.Sprintf(`[{"op": "add", "path": "/spec/upstream", "value": "%s"}]`, cv.Spec.Upstream)).Execute()
}

if caBundleFilePath != "" {
os.Remove(caBundleFilePath)
}
})

g.It("runs successfully, even without upstream OpenShift Update Service customization", func() {
_, err := oc.Run("adm", "upgrade", "recommend").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").Output()
_, err := oc.Run("adm", "upgrade", "recommend").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_PRECHECK", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_ACCEPT", "true").Output()
o.Expect(err).NotTo(o.HaveOccurred())
})

Expand All @@ -65,7 +71,7 @@ var _ = g.Describe("[Serial][sig-cli] oc adm upgrade recommend", g.Ordered, func
}
restoreChannel = true

out, err := oc.Run("adm", "upgrade", "recommend").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").Output()
out, err := oc.Run("adm", "upgrade", "recommend").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_PRECHECK", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_ACCEPT", "true").Output()
o.Expect(err).NotTo(o.HaveOccurred())
err = matchRegexp(out, `.*The update channel has not been configured.*`)
o.Expect(err).NotTo(o.HaveOccurred())
Expand Down Expand Up @@ -99,7 +105,7 @@ var _ = g.Describe("[Serial][sig-cli] oc adm upgrade recommend", g.Ordered, func
})

g.It("runs successfully", func() {
out, err := oc.Run("adm", "upgrade", "recommend").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").Output()
out, err := oc.Run("adm", "upgrade", "recommend").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_PRECHECK", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_ACCEPT", "true").Output()
o.Expect(err).NotTo(o.HaveOccurred())
err = matchRegexp(out, `.*Upstream update service: http://.*
Channel: test-channel [(]available channels: other-channel, test-channel[)]
Expand All @@ -110,6 +116,7 @@ No updates available. You may still upgrade to a specific release image.*`)

g.Context("When the update service has conditional recommendations", func() {
var currentVersion *semver.Version
var token string

g.BeforeAll(func() {
isHyperShift, err := exutil.IsHypershift(ctx, oc.AdminConfigClient())
Expand Down Expand Up @@ -175,16 +182,45 @@ No updates available. You may still upgrade to a specific release image.*`)
}
restoreUpstream = true

defaultIngressCert, err := getDefaultIngressCertificate(ctx, oc)
o.Expect(err).NotTo(o.HaveOccurred())

kubeCerts, err := getKubernetesAPIServerCertificates(ctx, oc)
o.Expect(err).NotTo(o.HaveOccurred())

caBundleFile, err := os.CreateTemp("", "ca-bundle")
caBundleFilePath = caBundleFile.Name()
_, err = caBundleFile.WriteString(fmt.Sprintf("%s\n%s", defaultIngressCert, kubeCerts))
o.Expect(err).NotTo(o.HaveOccurred())

// alert retrieval requires a token-based kubeconfig to avoid:
// Failed to check for at least some preconditions: failed to get alerts from Thanos: no token is currently in use for this session
o.Expect(oc.Run("create").Args("serviceaccount", "test").Execute()).To(o.Succeed())
o.Expect(oc.Run("create").Args("clusterrolebinding", fmt.Sprintf("%s-test", oc.Namespace()), "--clusterrole=cluster-admin", fmt.Sprintf("--serviceaccount=%s:test", oc.Namespace())).Execute()).To(o.Succeed())
token, err = oc.Run("create").Args("token", "test").Output()
o.Expect(err).NotTo(o.HaveOccurred())

time.Sleep(16 * time.Second) // Give the CVO time to retrieve recommendations and push to status
})

g.AfterAll(func() {
// apparently ClusterRoleBindings are not automatically garbage-collected after the referenced service-account is removed (as part of namespace removal).
oc.Run("delete").Args("clusterrolebinding", fmt.Sprintf("%s-test", oc.Namespace())).Execute()
})

g.It("runs successfully when listing all updates", func() {
out, err := oc.Run("adm", "upgrade", "recommend").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").Output()
o.Expect(err).NotTo(o.HaveOccurred())
err = matchRegexp(out, `Upstream update service: http://.*
oc.WithKubeConfigCopy(func(oc *exutil.CLI) {
o.Expect(oc.Run("config", "set-credentials").Args("test", "--token", token).Execute()).To(o.Succeed())
o.Expect(oc.Run("config", "set-context").Args("--current", "--user", "test").Execute()).To(o.Succeed())

out, err := oc.Run("--certificate-authority", caBundleFilePath, "adm", "upgrade", "recommend").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_PRECHECK", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_ACCEPT", "true").Output()
o.Expect(err).NotTo(o.HaveOccurred())
err = matchRegexp(out, `The following conditions found no cause for concern in updating this cluster to later releases.*

Upstream update service: http://.*
Channel: test-channel [(]available channels: other-channel, test-channel[)]

Updates to 4.[0-9]*:
Updates to 4[.][0-9]*:

Version: 4[.][0-9]*[.]0
Image: example[.]com/test@sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
Expand All @@ -195,21 +231,31 @@ Updates to 4[.][0-9]*:
VERSION *ISSUES
4[.][0-9]*[.]999 *no known issues relevant to this cluster
4[.][0-9]*[.]998 *no known issues relevant to this cluster`)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(err).NotTo(o.HaveOccurred())
})
})

g.It("runs successfully with conditional recommendations to the --version target", func() {
out, err := oc.Run("adm", "upgrade", "recommend", "--version", fmt.Sprintf("4.%d.0", currentVersion.Minor+1)).EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").Output()
o.Expect(err).NotTo(o.HaveOccurred())
err = matchRegexp(out, `Upstream update service: http://.*
oc.WithKubeConfigCopy(func(oc *exutil.CLI) {
o.Expect(oc.Run("config", "set-credentials").Args("test", "--token", token).Execute()).To(o.Succeed())
o.Expect(oc.Run("config", "set-context").Args("--current", "--user", "test").Execute()).To(o.Succeed())

out, err := oc.Run("--certificate-authority", caBundleFilePath, "adm", "upgrade", "recommend", "--version", fmt.Sprintf("4.%d.0", currentVersion.Minor+1), "--accept", "ConditionalUpdateRisk,Failing").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_PRECHECK", "true").EnvVar("OC_ENABLE_CMD_UPGRADE_RECOMMEND_ACCEPT", "true").Output()

o.Expect(err).NotTo(o.HaveOccurred())
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hrm, e2e-aws-ovn-single-node-serial failed:

: [Serial][sig-cli] oc adm upgrade recommend When the update service has conditional recommendations runs successfully with conditional recommendations to the --version target [Suite:openshift/conformance/serial]	25s
{  fail [github.com/openshift/origin/test/extended/cli/adm_upgrade/recommend.go:245]: Unexpected error:
    ...
    Failing=True:
    
      Reason: ClusterOperatorNotAvailable
      Message: Cluster operator authentication is not available
      ...
      error: issues that apply to this cluster but which were not included in --accept: Failing

The test-case that covers auth availability flaked:

: [bz-apiserver-auth] clusteroperator/authentication should not change condition/Available
...15 unwelcome but acceptable clusteroperator state transitions during e2e test run...
...exception: https://issues.redhat.com/browse/OCPBUGS-20056...

Working in an exception might be tricky regular-expression handling. Trying to gauge how frequent this issue is:

/payload-aggregate periodic-ci-openshift-release-master-nightly-4.21-e2e-aws-ovn-single-node-serial 20

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of the aggregation run's 20 attempts:

So the auth functionality is pretty flaky in these single-node clusters under serial-suite load. I've pushed c68be5e -> 78ac50f, to hopefully soften this test-case enough to not trip over this issue.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With 78ac50f in place, the only serial presubmit which failed was e2e-metal-ipi-serial-2of2, on an unrelated metrics-auth context deadline exceeded issue. The 20 single-node runs are still not great, but no longer has any failures related to my test-cases:

err = matchRegexp(out, `The following conditions found no cause for concern in updating this cluster to later releases.*

Upstream update service: http://.*
Channel: test-channel [(]available channels: other-channel, test-channel[)]

Update to 4[.][0-9]*[.]0 Recommended=False:
Image: example.com/test@sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
Release URL: https://example.com/release/4[.][0-9]*[.]0
Reason: (TestRiskA|MultipleReasons)
Message: (?s:.*)This is a test risk. https://example.com/testRiskA`)
o.Expect(err).NotTo(o.HaveOccurred())
Reason: accepted (TestRiskA|MultipleReasons) via ConditionalUpdateRisk
Message: (?s:.*)This is a test risk[.] https://example.com/testRiskA
Update to 4[.][0-9]*[.]0 has no known issues relevant to this cluster other than the accepted ConditionalUpdateRisk(|,Failing).`)
o.Expect(err).NotTo(o.HaveOccurred())
})
})
})
})
Expand Down Expand Up @@ -291,3 +337,43 @@ python3 -m http.server --bind ::
Path: "graph",
}, nil
}

func getDefaultIngressCertificate(ctx context.Context, oc *exutil.CLI) (string, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is amazing!
I will try to remember the existence of these powerful functions in case I need them in the future.

defaultIngressSecretName, err := oc.Run("get").Args("--namespace=openshift-ingress-operator", "-o", "jsonpath={.spec.defaultCertificate.name}", "ingresscontroller.operator.openshift.io", "default").Output()
if err != nil {
return "", err
}

if defaultIngressSecretName == "" {
defaultIngressSecretName = "router-certs-default"
}

ingressNamespace := "openshift-ingress"
defaultIngressCert, err := oc.Run("extract").Args("--namespace", ingressNamespace, fmt.Sprintf("secret/%s", defaultIngressSecretName), "--keys=tls.crt", "--to=-").Output()
if err != nil {
return "", err
}
Comment on lines +342 to +355
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hehe given my push for "let's stop using oc to test CVO" in threads like this I think I'll need to voice here as well; do these two need to be oc calls? I think that at least the first get would be more appropriate to do thought client-go and get a typed struct instead of reading elements with jsonpath of --keys.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they don't need to be oc, but given that we're in the test/extended/cli section, using oc seemed ok. I'm not strongly opinionated for this setup though; they could certainly be ported to the structured client-go if they were going to be used outside of oc CLI testing.

defaultIngressCert = fmt.Sprintf("%s\n", defaultIngressCert) // ensure a trailing newline, even if the earlier Output() stripped trailing newlines
framework.Logf("default ingress certificate from the %s secret in the %s namespace: %q", defaultIngressSecretName, ingressNamespace, fmt.Sprintf("%s...", defaultIngressCert[:30]))
return defaultIngressCert, nil
}

func getKubernetesAPIServerCertificates(ctx context.Context, oc *exutil.CLI) (string, error) {
kubeNamespace := "openshift-kube-apiserver"
secrets, err := oc.AdminKubeClient().CoreV1().Secrets(kubeNamespace).List(ctx, metav1.ListOptions{})
if err != nil {
return "", err
}

certs := make([]string, 0, len(secrets.Items))
for _, secret := range secrets.Items {
if secret.Type != corev1.SecretTypeTLS {
continue
}
certs = append(certs, string(secret.Data["tls.crt"]))
}

kubeCerts := strings.Join(certs, "")
framework.Logf("default Kubernetes certificates from TLS secrets in the %s namespace: %q", kubeNamespace, fmt.Sprintf("%s...", kubeCerts[:30]))
return kubeCerts, nil
}