Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion cmd/install/assets/hypershift_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ type HyperShiftOperatorDeployment struct {
IncludeVersion bool
UWMTelemetry bool
RHOBSMonitoring bool
CVOPrometheusURL string
MonitoringDashboards bool
CertRotationScale time.Duration
EnableCVOManagementClusterMetricsAccess bool
Expand Down Expand Up @@ -616,6 +617,13 @@ func (o HyperShiftOperatorDeployment) Build() *appsv1.Deployment {
})
}

if o.CVOPrometheusURL != "" {
envVars = append(envVars, corev1.EnvVar{
Name: config.CVOPrometheusURLEnvVar,
Value: o.CVOPrometheusURL,
})
}

if o.MonitoringDashboards {
envVars = append(envVars, corev1.EnvVar{
Name: "MONITORING_DASHBOARDS",
Expand Down Expand Up @@ -985,6 +993,7 @@ func (o HyperShiftOperatorServiceAccount) Build() *corev1.ServiceAccount {

type HyperShiftOperatorClusterRole struct {
EnableCVOManagementClusterMetricsAccess bool
RHOBSMonitoring bool
ManagedService string
}

Expand Down Expand Up @@ -1256,7 +1265,7 @@ func (o HyperShiftOperatorClusterRole) Build() *rbacv1.ClusterRole {
},
},
}
if o.EnableCVOManagementClusterMetricsAccess {
if o.EnableCVOManagementClusterMetricsAccess || o.RHOBSMonitoring {
role.Rules = append(role.Rules,
rbacv1.PolicyRule{
APIGroups: []string{"metrics.k8s.io"},
Expand Down
10 changes: 9 additions & 1 deletion cmd/install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ type Options struct {
WaitUntilAvailable bool
WaitUntilEstablished bool
RHOBSMonitoring bool
CVOPrometheusURL string
SLOsAlerts bool
MonitoringDashboards bool
CertRotationScale time.Duration
Expand Down Expand Up @@ -178,6 +179,10 @@ func (o *Options) Validate() error {
errs = append(errs, fmt.Errorf("when invoking this command with the --rhobs-monitoring flag, the --enable-cvo-management-cluster-metrics-access flag is not supported "))
}

if len(o.CVOPrometheusURL) > 0 && !o.RHOBSMonitoring && !o.EnableCVOManagementClusterMetricsAccess {
errs = append(errs, fmt.Errorf("--cvo-prometheus-url requires either --rhobs-monitoring or --enable-cvo-management-cluster-metrics-access to be enabled"))
}

if len(o.ManagedService) > 0 && o.ManagedService != hyperv1.AroHCP {
errs = append(errs, fmt.Errorf("not a valid managed service type: %s", o.ManagedService))
}
Expand Down Expand Up @@ -246,7 +251,8 @@ func NewCommand() *cobra.Command {
cmd.PersistentFlags().BoolVar(&opts.EnableCVOManagementClusterMetricsAccess, "enable-cvo-management-cluster-metrics-access", opts.EnableCVOManagementClusterMetricsAccess, "If true, the hosted CVO will have access to the management cluster metrics server to evaluate conditional updates (supported for OpenShift management clusters)")
cmd.Flags().BoolVar(&opts.WaitUntilAvailable, "wait-until-available", opts.WaitUntilAvailable, "If true, pauses installation until hypershift operator has been rolled out and its webhook service is available (if installing the webhook)")
cmd.Flags().BoolVar(&opts.WaitUntilEstablished, "wait-until-crds-established", opts.WaitUntilEstablished, "If true, pauses installation until all custom resource definitions are established before applying other manifests.")
cmd.PersistentFlags().BoolVar(&opts.RHOBSMonitoring, "rhobs-monitoring", opts.RHOBSMonitoring, "If true, HyperShift will generate and use the RHOBS version of monitoring resources (ServiceMonitors, PodMonitors, etc)")
cmd.PersistentFlags().BoolVar(&opts.RHOBSMonitoring, "rhobs-monitoring", opts.RHOBSMonitoring, "If true, HyperShift will generate and use the RHOBS version of monitoring resources (ServiceMonitors, PodMonitors, etc). For ROSA HCP, this also enables the Cluster Version Operator to query the RHOBS Prometheus for conditional update evaluation. Use --cvo-prometheus-url to override the default Prometheus endpoint.")
cmd.PersistentFlags().StringVar(&opts.CVOPrometheusURL, "cvo-prometheus-url", opts.CVOPrometheusURL, "Prometheus URL for the Cluster Version Operator to query metrics for conditional update risk evaluation. Only effective when --rhobs-monitoring or --enable-cvo-management-cluster-metrics-access is enabled. If not specified, defaults to the RHOBS monitoring stack for ROSA HCP, or the Thanos querier for self-managed HyperShift.")
cmd.PersistentFlags().BoolVar(&opts.SLOsAlerts, "slos-alerts", opts.SLOsAlerts, "If true, HyperShift will generate and use the prometheus alerts for monitoring HostedCluster and NodePools")
cmd.PersistentFlags().BoolVar(&opts.MonitoringDashboards, "monitoring-dashboards", opts.MonitoringDashboards, "If true, HyperShift will generate a monitoring dashboard for every HostedCluster that it creates")
cmd.PersistentFlags().DurationVar(&opts.CertRotationScale, "cert-rotation-scale", opts.CertRotationScale, "The scaling factor for certificate rotation. It is not supported to set this to anything other than 24h.")
Expand Down Expand Up @@ -787,6 +793,7 @@ func setupOperatorResources(opts Options, userCABundleCM *corev1.ConfigMap, trus
IncludeVersion: !opts.Template,
UWMTelemetry: opts.EnableUWMTelemetryRemoteWrite,
RHOBSMonitoring: opts.RHOBSMonitoring,
CVOPrometheusURL: opts.CVOPrometheusURL,
MonitoringDashboards: opts.MonitoringDashboards,
CertRotationScale: opts.CertRotationScale,
EnableCVOManagementClusterMetricsAccess: opts.EnableCVOManagementClusterMetricsAccess,
Expand Down Expand Up @@ -946,6 +953,7 @@ func setupRBAC(opts Options, operatorNamespace *corev1.Namespace) (*corev1.Servi

operatorClusterRole := assets.HyperShiftOperatorClusterRole{
EnableCVOManagementClusterMetricsAccess: opts.EnableCVOManagementClusterMetricsAccess,
RHOBSMonitoring: opts.RHOBSMonitoring,
ManagedService: opts.ManagedService,
}.Build()
objects = append(objects, operatorClusterRole)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func NewCVOParams(hcp *hyperv1.HostedControlPlane, releaseImageProvider imagepro
p.FeatureSet = hcp.Spec.Configuration.FeatureGate.FeatureSet
}

if enableCVOManagementClusterMetricsAccess {
if IsManagementClusterMetricsAccessEnabled(hcp, enableCVOManagementClusterMetricsAccess) {
p.DeploymentConfig.AdditionalLabels = map[string]string{
config.NeedMetricsServerAccessLabel: "true",
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cvo
import (
"encoding/json"
"fmt"
"os"
"path"
"strings"

Expand All @@ -11,10 +12,12 @@ import (
"github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/manifests"
"github.com/openshift/hypershift/hypershift-operator/controllers/manifests/controlplaneoperator"
"github.com/openshift/hypershift/support/api"
"github.com/openshift/hypershift/support/awsutil"
"github.com/openshift/hypershift/support/capabilities"
"github.com/openshift/hypershift/support/certs"
"github.com/openshift/hypershift/support/config"
"github.com/openshift/hypershift/support/metrics"
"github.com/openshift/hypershift/support/rhobsmonitoring"
"github.com/openshift/hypershift/support/util"

configv1 "github.com/openshift/api/config/v1"
Expand Down Expand Up @@ -120,7 +123,18 @@ func cvoLabels() map[string]string {

var port int32 = 8443

func ReconcileDeployment(deployment *appsv1.Deployment, ownerRef config.OwnerRef, deploymentConfig config.DeploymentConfig, cvoImage, dataPlaneReleaseImage, cliImage, availabilityProberImage, clusterID string, updateService configv1.URL, platformType hyperv1.PlatformType, oauthEnabled, enableCVOManagementClusterMetricsAccess bool, featureSet configv1.FeatureSet, caps *hyperv1.Capabilities) error {
// IsManagementClusterMetricsAccessEnabled determines if CVO needs access to a metrics
// endpoint on the Management Cluster. This covers two scenarios:
// - Self-managed HyperShift: Thanos Querier in openshift-monitoring namespace
// (controlled by enableCVOManagementClusterMetricsAccess flag)
// - ROSA HCP: RHOBS Prometheus in openshift-observability-operator namespace
// (enabled when RHOBS monitoring is active on ROSA HCP clusters)
func IsManagementClusterMetricsAccessEnabled(hcp *hyperv1.HostedControlPlane, enableCVOManagementClusterMetricsAccess bool) bool {
return enableCVOManagementClusterMetricsAccess ||
(os.Getenv(rhobsmonitoring.EnvironmentVariable) == "1" && awsutil.IsROSAHCP(hcp))
}

func ReconcileDeployment(deployment *appsv1.Deployment, hcp *hyperv1.HostedControlPlane, ownerRef config.OwnerRef, deploymentConfig config.DeploymentConfig, cvoImage, dataPlaneReleaseImage, cliImage, availabilityProberImage, clusterID string, updateService configv1.URL, platformType hyperv1.PlatformType, oauthEnabled, enableCVOManagementClusterMetricsAccess bool, featureSet configv1.FeatureSet, caps *hyperv1.Capabilities) error {
ownerRef.ApplyTo(deployment)

// preserve existing resource requirements for main CVO container
Expand Down Expand Up @@ -163,6 +177,8 @@ func ReconcileDeployment(deployment *appsv1.Deployment, ownerRef config.OwnerRef
return err
}

enableMetricsAccess := IsManagementClusterMetricsAccessEnabled(hcp, enableCVOManagementClusterMetricsAccess)

deployment.Spec = appsv1.DeploymentSpec{
Selector: selector,
Template: corev1.PodTemplateSpec{
Expand All @@ -176,7 +192,7 @@ func ReconcileDeployment(deployment *appsv1.Deployment, ownerRef config.OwnerRef
util.BuildContainer(cvoContainerBootstrap(), buildCVOContainerBootstrap(cliImage, clusterVersionJSON)),
},
Containers: []corev1.Container{
util.BuildContainer(cvoContainerMain(), buildCVOContainerMain(cvoImage, dataPlaneReleaseImage, deployment.Namespace, updateService, enableCVOManagementClusterMetricsAccess)),
util.BuildContainer(cvoContainerMain(), buildCVOContainerMain(cvoImage, dataPlaneReleaseImage, deployment.Namespace, updateService, hcp, enableMetricsAccess)),
},
Volumes: []corev1.Volume{
util.BuildVolume(cvoVolumePayload(), buildCVOVolumePayload),
Expand All @@ -188,7 +204,13 @@ func ReconcileDeployment(deployment *appsv1.Deployment, ownerRef config.OwnerRef
},
}
deployment.Spec.Template.Spec.AutomountServiceAccountToken = ptr.To(false)
if enableCVOManagementClusterMetricsAccess {
if enableMetricsAccess {
// Set annotation to enable automountServiceAccountToken for metrics endpoint access
if deployment.Spec.Template.Annotations == nil {
deployment.Spec.Template.Annotations = map[string]string{}
}
deployment.Spec.Template.Annotations[config.NeedMetricsServerAccessLabel] = "true"

deployment.Spec.Template.Spec.ServiceAccountName = manifests.ClusterVersionOperatorServiceAccount("").Name
deployment.Spec.Template.Spec.AutomountServiceAccountToken = ptr.To(true)
}
Expand Down Expand Up @@ -394,7 +416,7 @@ done
return fmt.Sprintf(scriptTemplate, payloadDir, string(clusterVersionJSON))
}

func buildCVOContainerMain(cvoImage, dataPlaneReleaseImage, namespace string, updateService configv1.URL, enableCVOManagementClusterMetricsAccess bool) func(c *corev1.Container) {
func buildCVOContainerMain(cvoImage, dataPlaneReleaseImage, namespace string, updateService configv1.URL, hcp *hyperv1.HostedControlPlane, enableMetricsAccess bool) func(c *corev1.Container) {
cpath := func(vol, file string) string {
return path.Join(volumeMounts.Path(cvoContainerMain().Name, vol), file)
}
Expand All @@ -418,10 +440,23 @@ func buildCVOContainerMain(cvoImage, dataPlaneReleaseImage, namespace string, up
if updateService != "" {
c.Args = append(c.Args, "--update-service", string(updateService))
}
if enableCVOManagementClusterMetricsAccess {
if enableMetricsAccess {
c.Args = append(c.Args, "--use-dns-for-services=true")

// Determine metrics URL based on monitoring stack.
cvoPrometheusURL := os.Getenv(config.CVOPrometheusURLEnvVar)
if cvoPrometheusURL == "" {
if os.Getenv(rhobsmonitoring.EnvironmentVariable) == "1" && awsutil.IsROSAHCP(hcp) {
// RHOBS monitoring stack (ROSA HCP) - currently uses HTTP without TLS
cvoPrometheusURL = "http://hypershift-monitoring-stack-prometheus.openshift-observability-operator.svc:9090"
} else {
// Self-managed HyperShift - OCP Thanos Query uses HTTPS with service CA
cvoPrometheusURL = fmt.Sprintf("https://thanos-querier.openshift-monitoring.svc:9092?namespace=%s", namespace)
}
}

c.Args = append(c.Args, "--metrics-ca-bundle-file=/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt")
c.Args = append(c.Args, fmt.Sprintf("--metrics-url=https://thanos-querier.openshift-monitoring.svc:9092?namespace=%s", namespace))
c.Args = append(c.Args, fmt.Sprintf("--metrics-url=%s", cvoPrometheusURL))
}
c.Env = []corev1.EnvVar{
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3793,7 +3793,10 @@ func (r *HostedControlPlaneReconciler) reconcileClusterVersionOperator(ctx conte
}
}

if r.EnableCVOManagementClusterMetricsAccess {
// Enable RBAC for metrics access when either:
// - Self-managed HyperShift with explicit flag
// - ROSA HCP with RHOBS monitoring enabled
if cvo.IsManagementClusterMetricsAccessEnabled(hcp, r.EnableCVOManagementClusterMetricsAccess) {
sa := manifests.ClusterVersionOperatorServiceAccount(hcp.Namespace)
if _, err := createOrUpdate(ctx, r.Client, sa, func() error {
return cvo.ReconcileServiceAccount(sa, p.OwnerRef)
Expand Down Expand Up @@ -3842,7 +3845,7 @@ func (r *HostedControlPlaneReconciler) reconcileClusterVersionOperator(ctx conte

deployment := manifests.ClusterVersionOperatorDeployment(hcp.Namespace)
if _, err := createOrUpdate(ctx, r, deployment, func() error {
return cvo.ReconcileDeployment(deployment, p.OwnerRef, p.DeploymentConfig, cvoImage, dataPlaneReleaseImage, p.CLIImage, p.AvailabilityProberImage, p.ClusterID, hcp.Spec.UpdateService, p.PlatformType, util.HCPOAuthEnabled(hcp), r.EnableCVOManagementClusterMetricsAccess, p.FeatureSet, hcp.Spec.Capabilities)
return cvo.ReconcileDeployment(deployment, hcp, p.OwnerRef, p.DeploymentConfig, cvoImage, dataPlaneReleaseImage, p.CLIImage, p.AvailabilityProberImage, p.ClusterID, hcp.Spec.UpdateService, p.PlatformType, util.HCPOAuthEnabled(hcp), r.EnableCVOManagementClusterMetricsAccess, p.FeatureSet, hcp.Spec.Capabilities)
}); err != nil {
return fmt.Errorf("failed to reconcile cluster version operator deployment: %w", err)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
package cvo

import (
"os"

hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1"
oapiv2 "github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/v2/oapi"
"github.com/openshift/hypershift/support/awsutil"
component "github.com/openshift/hypershift/support/controlplane-component"
"github.com/openshift/hypershift/support/rhobsmonitoring"
"github.com/openshift/hypershift/support/util"
)

Expand Down Expand Up @@ -67,6 +71,13 @@ func NewComponent(enableCVOManagementClusterMetricsAccess bool) component.Contro
Build()
}

// isManagementClusterMetricsAccessEnabled determines if CVO needs access to a metrics
// endpoint on the Management Cluster. This covers two scenarios:
// - Self-managed HyperShift: Thanos Querier in openshift-monitoring namespace
// (controlled by enableCVOManagementClusterMetricsAccess flag)
// - ROSA HCP: RHOBS Prometheus in openshift-observability-operator namespace
// (enabled when RHOBS monitoring is active on ROSA HCP clusters)
func (cvo *clusterVersionOperator) isManagementClusterMetricsAccessEnabled(cpContext component.WorkloadContext) bool {
return cvo.enableCVOManagementClusterMetricsAccess
return cvo.enableCVOManagementClusterMetricsAccess ||
(os.Getenv(rhobsmonitoring.EnvironmentVariable) == "1" && awsutil.IsROSAHCP(cpContext.HCP))
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@ package cvo
import (
"encoding/json"
"fmt"
"os"
"path"
"strings"

hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1"
"github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/common"
hyperapi "github.com/openshift/hypershift/support/api"
"github.com/openshift/hypershift/support/awsutil"
"github.com/openshift/hypershift/support/capabilities"
"github.com/openshift/hypershift/support/config"
component "github.com/openshift/hypershift/support/controlplane-component"
"github.com/openshift/hypershift/support/rhobsmonitoring"
"github.com/openshift/hypershift/support/util"

configv1 "github.com/openshift/api/config/v1"
Expand All @@ -27,11 +30,20 @@ import (
)

func (cvo *clusterVersionOperator) adaptDeployment(cpContext component.WorkloadContext, deployment *appsv1.Deployment) error {
if cvo.enableCVOManagementClusterMetricsAccess {
enableMetricsAccess := cvo.isManagementClusterMetricsAccessEnabled(cpContext)

if enableMetricsAccess {
if deployment.Spec.Template.Labels == nil {
deployment.Spec.Template.Labels = map[string]string{}
}
deployment.Spec.Template.Labels[config.NeedMetricsServerAccessLabel] = "true"

// Set annotation to enable automountServiceAccountToken for metrics endpoint access
if deployment.Spec.Template.Annotations == nil {
deployment.Spec.Template.Annotations = map[string]string{}
}
deployment.Spec.Template.Annotations[config.NeedMetricsServerAccessLabel] = "true"

deployment.Spec.Template.Spec.ServiceAccountName = ComponentName
}

Expand Down Expand Up @@ -95,10 +107,24 @@ func (cvo *clusterVersionOperator) adaptDeployment(cpContext component.WorkloadC
if updateService := cpContext.HCP.Spec.UpdateService; updateService != "" {
c.Args = append(c.Args, "--update-service", string(updateService))
}
if cvo.enableCVOManagementClusterMetricsAccess {

if enableMetricsAccess {
c.Args = append(c.Args, "--use-dns-for-services=true")

// Determine metrics URL based on monitoring stack.
cvoPrometheusURL := os.Getenv(config.CVOPrometheusURLEnvVar)
if cvoPrometheusURL == "" {
if os.Getenv(rhobsmonitoring.EnvironmentVariable) == "1" && awsutil.IsROSAHCP(cpContext.HCP) {
// RHOBS monitoring stack (ROSA HCP) - currently uses HTTP without TLS
cvoPrometheusURL = "http://hypershift-monitoring-stack-prometheus.openshift-observability-operator.svc:9090"
} else {
// Self-managed HyperShift - OCP Thanos Query uses HTTPS with service CA
cvoPrometheusURL = fmt.Sprintf("https://thanos-querier.openshift-monitoring.svc:9092?namespace=%s", cpContext.HCP.Namespace)
}
}

c.Args = append(c.Args, "--metrics-ca-bundle-file=/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt")
c.Args = append(c.Args, fmt.Sprintf("--metrics-url=https://thanos-querier.openshift-monitoring.svc:9092?namespace=%s", cpContext.HCP.Namespace))
c.Args = append(c.Args, fmt.Sprintf("--metrics-url=%s", cvoPrometheusURL))
}
})

Expand Down
Loading