diff --git a/changelogs/unreleased/9059-reasonerjt b/changelogs/unreleased/9059-reasonerjt new file mode 100644 index 0000000000..abdc483aae --- /dev/null +++ b/changelogs/unreleased/9059-reasonerjt @@ -0,0 +1 @@ +Add Gauge metric for BSL availability \ No newline at end of file diff --git a/go.mod b/go.mod index 92a60b2550..29f48a1f60 100644 --- a/go.mod +++ b/go.mod @@ -2,8 +2,6 @@ module github.com/vmware-tanzu/velero go 1.23.0 -toolchain go1.23.6 - require ( cloud.google.com/go/storage v1.54.0 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index 34e071430d..bc8171820e 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -590,6 +590,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string }, newPluginManager, backupStoreGetter, + s.metrics, s.logger, ) if err := bslr.SetupWithManager(s.mgr); err != nil { diff --git a/pkg/controller/backup_storage_location_controller.go b/pkg/controller/backup_storage_location_controller.go index a0c9eeb2d1..ba765607d2 100644 --- a/pkg/controller/backup_storage_location_controller.go +++ b/pkg/controller/backup_storage_location_controller.go @@ -21,6 +21,8 @@ import ( "strings" "time" + "github.com/vmware-tanzu/velero/pkg/metrics" + "github.com/pkg/errors" "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -53,8 +55,8 @@ type backupStorageLocationReconciler struct { // replaced with fakes for testing. newPluginManager func(logrus.FieldLogger) clientmgmt.Manager backupStoreGetter persistence.ObjectBackupStoreGetter - - log logrus.FieldLogger + metrics *metrics.ServerMetrics + log logrus.FieldLogger } // NewBackupStorageLocationReconciler initialize and return a backupStorageLocationReconciler struct @@ -64,6 +66,7 @@ func NewBackupStorageLocationReconciler( defaultBackupLocationInfo storage.DefaultBackupLocationInfo, newPluginManager func(logrus.FieldLogger) clientmgmt.Manager, backupStoreGetter persistence.ObjectBackupStoreGetter, + metrics *metrics.ServerMetrics, log logrus.FieldLogger) *backupStorageLocationReconciler { return &backupStorageLocationReconciler{ ctx: ctx, @@ -71,6 +74,7 @@ func NewBackupStorageLocationReconciler( defaultBackupLocationInfo: defaultBackupLocationInfo, newPluginManager: newPluginManager, backupStoreGetter: backupStoreGetter, + metrics: metrics, log: log, } } @@ -164,8 +168,10 @@ func (r *backupStorageLocationReconciler) logReconciledPhase(defaultFound bool, switch phase { case velerov1api.BackupStorageLocationPhaseAvailable: availableBSLs = append(availableBSLs, &locationList.Items[i]) + r.metrics.RegisterBackupLocationAvailable(locationList.Items[i].Name) case velerov1api.BackupStorageLocationPhaseUnavailable: unAvailableBSLs = append(unAvailableBSLs, &locationList.Items[i]) + r.metrics.RegisterBackupLocationUnavailable(locationList.Items[i].Name) default: unknownBSLs = append(unknownBSLs, &locationList.Items[i]) } diff --git a/pkg/controller/backup_storage_location_controller_test.go b/pkg/controller/backup_storage_location_controller_test.go index 0a49389c4a..faeec54779 100644 --- a/pkg/controller/backup_storage_location_controller_test.go +++ b/pkg/controller/backup_storage_location_controller_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + "github.com/vmware-tanzu/velero/pkg/metrics" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/pkg/errors" @@ -92,6 +94,7 @@ var _ = Describe("Backup Storage Location Reconciler", func() { }, newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, backupStoreGetter: NewFakeObjectBackupStoreGetter(backupStores), + metrics: metrics.NewServerMetrics(), log: velerotest.NewLogger(), } @@ -157,6 +160,7 @@ var _ = Describe("Backup Storage Location Reconciler", func() { }, newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, backupStoreGetter: NewFakeObjectBackupStoreGetter(backupStores), + metrics: metrics.NewServerMetrics(), log: velerotest.NewLogger(), } @@ -245,6 +249,7 @@ func TestEnsureSingleDefaultBSL(t *testing.T) { ctx: context.Background(), client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&test.locations).Build(), defaultBackupLocationInfo: test.defaultBackupInfo, + metrics: metrics.NewServerMetrics(), log: velerotest.NewLogger(), } defaultFound, err := r.ensureSingleDefaultBSL(test.locations) @@ -289,6 +294,7 @@ func TestBSLReconcile(t *testing.T) { ctx: context.Background(), client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&test.locationList).Build(), newPluginManager: func(logrus.FieldLogger) clientmgmt.Manager { return pluginManager }, + metrics: metrics.NewServerMetrics(), log: velerotest.NewLogger(), } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 7a477e5f0c..557a0a4b87 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -47,6 +47,7 @@ const ( backupItemsErrorsGauge = "backup_items_errors" backupWarningTotal = "backup_warning_total" backupLastStatus = "backup_last_status" + backupLocationStatus = "backup_location_status_gauge" restoreTotal = "restore_total" restoreAttemptTotal = "restore_attempt_total" restoreValidationFailedTotal = "restore_validation_failed_total" @@ -77,6 +78,7 @@ const ( // Labels nodeMetricLabel = "node" podVolumeOperationLabel = "operation" + bslNameLabel = "backup_location_name" pvbNameLabel = "pod_volume_backup" scheduleLabel = "schedule" backupNameLabel = "backupName" @@ -228,6 +230,14 @@ func NewServerMetrics() *ServerMetrics { }, []string{scheduleLabel}, ), + backupLocationStatus: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metricNamespace, + Name: backupLocationStatus, + Help: "The status of backup location. A value of 1 is available, 0 is unavailable", + }, + []string{bslNameLabel}, + ), restoreTotal: prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: metricNamespace, @@ -888,3 +898,17 @@ func (m *ServerMetrics) RegisterCSISnapshotFailures(backupSchedule, backupName s c.WithLabelValues(backupSchedule, backupName).Add(float64(csiSnapshotsFailed)) } } + +// RegisterBackupLocationAvailable records the availability of a backup location. +func (m *ServerMetrics) RegisterBackupLocationAvailable(backupLocationName string) { + if g, ok := m.metrics[backupLocationStatus].(*prometheus.GaugeVec); ok { + g.WithLabelValues(backupLocationName).Set(float64(1)) + } +} + +// RegisterBackupLocationUnavailable records the availability of a backup location. +func (m *ServerMetrics) RegisterBackupLocationUnavailable(backupLocationName string) { + if g, ok := m.metrics[backupLocationStatus].(*prometheus.GaugeVec); ok { + g.WithLabelValues(backupLocationName).Set(float64(0)) + } +}