diff --git a/changelogs/unreleased/8581-kaovilai b/changelogs/unreleased/8581-kaovilai new file mode 100644 index 0000000000..6e7ab78df5 --- /dev/null +++ b/changelogs/unreleased/8581-kaovilai @@ -0,0 +1 @@ +Configurable Kopia Maintenance Interval. backup-repository-configmap adds an option for configurable`fullMaintenanceInterval` where fastGC (12 hours), and eagerGC (6 hours) allowing for faster removal of deleted velero backups from kopia repo. diff --git a/pkg/controller/backup_repository_controller_test.go b/pkg/controller/backup_repository_controller_test.go index e2287f72e6..f26a221b23 100644 --- a/pkg/controller/backup_repository_controller_test.go +++ b/pkg/controller/backup_repository_controller_test.go @@ -694,7 +694,7 @@ func TestGetBackupRepositoryConfig(t *testing.T) { Namespace: velerov1api.DefaultNamespace, }, Data: map[string]string{ - "fake-repo-type": "{\"cacheLimitMB\": 1000, \"enableCompression\": true}", + "fake-repo-type": "{\"cacheLimitMB\": 1000, \"enableCompression\": true, \"fullMaintenanceInterval\": \"fastGC\"}", "fake-repo-type-1": "{\"cacheLimitMB\": 1, \"enableCompression\": false}", }, } @@ -744,8 +744,9 @@ func TestGetBackupRepositoryConfig(t *testing.T) { configWithData, }, expectedResult: map[string]string{ - "cacheLimitMB": "1000", - "enableCompression": "true", + "cacheLimitMB": "1000", + "enableCompression": "true", + "fullMaintenanceInterval": "fastGC", }, }, } diff --git a/pkg/repository/provider/unified_repo.go b/pkg/repository/provider/unified_repo.go index bdd909f206..251845dacb 100644 --- a/pkg/repository/provider/unified_repo.go +++ b/pkg/repository/provider/unified_repo.go @@ -506,6 +506,10 @@ func getStorageCredentials(backupLocation *velerov1api.BackupStorageLocation, cr return result, nil } +// Translates user specified options (backupRepoConfig) to internal parameters +// so we would accept only the options that are well defined in the internal system. +// Users' inputs should not be treated as safe any time. +// We remove the unnecessary parameters and keep the modules/logics below safe func getStorageVariables(backupLocation *velerov1api.BackupStorageLocation, repoBackend string, repoName string, backupRepoConfig map[string]string) (map[string]string, error) { result := make(map[string]string) @@ -576,9 +580,17 @@ func getStorageVariables(backupLocation *velerov1api.BackupStorageLocation, repo result[udmrepo.StoreOptionOssRegion] = strings.Trim(region, "/") result[udmrepo.StoreOptionFsPath] = config["fspath"] + // We remove the unnecessary parameters and keep the modules/logics below safe if backupRepoConfig != nil { - if v, found := backupRepoConfig[udmrepo.StoreOptionCacheLimit]; found { - result[udmrepo.StoreOptionCacheLimit] = v + // range of valid params to keep, everything else will be discarded. + validParams := []string{ + udmrepo.StoreOptionCacheLimit, + udmrepo.StoreOptionKeyFullMaintenanceInterval, + } + for _, param := range validParams { + if v, found := backupRepoConfig[param]; found { + result[param] = v + } } } diff --git a/pkg/repository/udmrepo/kopialib/lib_repo.go b/pkg/repository/udmrepo/kopialib/lib_repo.go index d4e1f88133..3937657aa9 100644 --- a/pkg/repository/udmrepo/kopialib/lib_repo.go +++ b/pkg/repository/udmrepo/kopialib/lib_repo.go @@ -600,6 +600,32 @@ func writeInitParameters(ctx context.Context, repoOption udmrepo.RepoOptions, lo logger.Infof("Quick maintenance interval change from %v to %v", p.QuickCycle.Interval, overwriteQuickMaintainInterval) p.QuickCycle.Interval = overwriteQuickMaintainInterval } + // the repoOption.StorageOptions are set via + // udmrepo.WithStoreOptions -> udmrepo.GetStoreOptions (interface) + // -> pkg/repository/provider.GetStoreOptions(param interface{}) -> pkg/repository/provider.getStorageVariables(..., backupRepoConfig) + // where backupRepoConfig comes from param.(RepoParam).BackupRepo.Spec.RepositoryConfig map[string]string + // where RepositoryConfig comes from pkg/controller/getBackupRepositoryConfig(...) + // where it gets a configMap name from pkg/cmd/server/config/Config.BackupRepoConfig + // which gets set via velero server flag `backup-repository-configmap` "The name of ConfigMap containing backup repository configurations." + // and data stored as json under ConfigMap.Data[repoType] where repoType is BackupRepository.Spec.RepositoryType: either kopia or restic + // repoOption.StorageOptions[udmrepo.StoreOptionKeyFullMaintenanceInterval] would for example look like + // configMapName.data.kopia: {"fullMaintenanceInterval": "eagerGC"} + fullMaintIntervalOption := udmrepo.FullMaintenanceIntervalOptions(repoOption.StorageOptions[udmrepo.StoreOptionKeyFullMaintenanceInterval]) + priorMaintInterval := p.FullCycle.Interval + switch fullMaintIntervalOption { + case udmrepo.FastGC: + p.FullCycle.Interval = udmrepo.FastGCInterval + case udmrepo.EagerGC: + p.FullCycle.Interval = udmrepo.EagerGCInterval + case udmrepo.NormalGC: + p.FullCycle.Interval = udmrepo.NormalGCInterval + case "": // do nothing + default: + return errors.Errorf("invalid full maintenance interval option %s", fullMaintIntervalOption) + } + if priorMaintInterval != p.FullCycle.Interval { + logger.Infof("Full maintenance interval change from %v to %v", priorMaintInterval, p.FullCycle.Interval) + } p.Owner = r.ClientOptions().UsernameAtHost() diff --git a/pkg/repository/udmrepo/kopialib/lib_repo_test.go b/pkg/repository/udmrepo/kopialib/lib_repo_test.go index a6382dbc80..acdf890b4a 100644 --- a/pkg/repository/udmrepo/kopialib/lib_repo_test.go +++ b/pkg/repository/udmrepo/kopialib/lib_repo_test.go @@ -24,6 +24,7 @@ import ( "time" "github.com/kopia/kopia/repo" + "github.com/kopia/kopia/repo/maintenance" "github.com/kopia/kopia/repo/manifest" "github.com/kopia/kopia/repo/object" "github.com/pkg/errors" @@ -264,6 +265,9 @@ func TestMaintain(t *testing.T) { func TestWriteInitParameters(t *testing.T) { var directRpo *repomocks.DirectRepository + assertFullMaintIntervalEqual := func(expected, actual *maintenance.Params) bool { + return assert.Equal(t, expected.FullCycle.Interval, actual.FullCycle.Interval) + } testCases := []struct { name string repoOptions udmrepo.RepoOptions @@ -272,7 +276,11 @@ func TestWriteInitParameters(t *testing.T) { repoOpen func(context.Context, string, string, *repo.Options) (repo.Repository, error) newRepoWriterError error replaceManifestError error - expectedErr string + // expected replacemanifest params to be received by maintenance.SetParams, and therefore writeInitParameters + expectedReplaceManifestsParams *maintenance.Params + // allows for asserting only certain fields are set as expected + assertReplaceManifestsParams func(*maintenance.Params, *maintenance.Params) bool + expectedErr string }{ { name: "repo open fail, repo not exist", @@ -323,6 +331,61 @@ func TestWriteInitParameters(t *testing.T) { replaceManifestError: errors.New("fake-replace-manifest-error"), expectedErr: "error to init write repo parameters: error to set maintenance params: put manifest: fake-replace-manifest-error", }, + { + name: "repo with maintenance interval has expected params", + repoOptions: udmrepo.RepoOptions{ + ConfigFilePath: "/tmp", + StorageOptions: map[string]string{ + udmrepo.StoreOptionKeyFullMaintenanceInterval: string(udmrepo.FastGC), + }, + }, + repoOpen: func(context.Context, string, string, *repo.Options) (repo.Repository, error) { + return directRpo, nil + }, + returnRepo: new(repomocks.DirectRepository), + returnRepoWriter: new(repomocks.DirectRepositoryWriter), + expectedReplaceManifestsParams: &maintenance.Params{ + FullCycle: maintenance.CycleParams{ + Interval: udmrepo.FastGCInterval, + }, + }, + assertReplaceManifestsParams: assertFullMaintIntervalEqual, + }, + { + name: "repo with empty maintenance interval has expected params", + repoOptions: udmrepo.RepoOptions{ + ConfigFilePath: "/tmp", + StorageOptions: map[string]string{ + udmrepo.StoreOptionKeyFullMaintenanceInterval: string(""), + }, + }, + repoOpen: func(context.Context, string, string, *repo.Options) (repo.Repository, error) { + return directRpo, nil + }, + returnRepo: new(repomocks.DirectRepository), + returnRepoWriter: new(repomocks.DirectRepositoryWriter), + expectedReplaceManifestsParams: &maintenance.Params{ + FullCycle: maintenance.CycleParams{ + Interval: udmrepo.NormalGCInterval, + }, + }, + assertReplaceManifestsParams: assertFullMaintIntervalEqual, + }, + { + name: "repo with invalid maintenance interval has expected errors", + repoOptions: udmrepo.RepoOptions{ + ConfigFilePath: "/tmp", + StorageOptions: map[string]string{ + udmrepo.StoreOptionKeyFullMaintenanceInterval: string("foo"), + }, + }, + repoOpen: func(context.Context, string, string, *repo.Options) (repo.Repository, error) { + return directRpo, nil + }, + returnRepo: new(repomocks.DirectRepository), + returnRepoWriter: new(repomocks.DirectRepositoryWriter), + expectedErr: "error to init write repo parameters: invalid full maintenance interval option foo", + }, } for _, tc := range testCases { @@ -346,7 +409,13 @@ func TestWriteInitParameters(t *testing.T) { if tc.returnRepoWriter != nil { tc.returnRepoWriter.On("Close", mock.Anything).Return(nil) - tc.returnRepoWriter.On("ReplaceManifests", mock.Anything, mock.Anything, mock.Anything).Return(manifest.ID(""), tc.replaceManifestError) + if tc.replaceManifestError != nil { + tc.returnRepoWriter.On("ReplaceManifests", mock.Anything, mock.Anything, mock.Anything).Return(manifest.ID(""), tc.replaceManifestError) + } + if tc.expectedReplaceManifestsParams != nil { + tc.returnRepoWriter.On("ReplaceManifests", mock.AnythingOfType("context.backgroundCtx"), mock.AnythingOfType("map[string]string"), mock.AnythingOfType("*maintenance.Params")).Return(manifest.ID(""), nil) + tc.returnRepoWriter.On("Flush", mock.Anything).Return(nil) + } } err := writeInitParameters(ctx, tc.repoOptions, logger) @@ -356,6 +425,11 @@ func TestWriteInitParameters(t *testing.T) { } else { assert.EqualError(t, err, tc.expectedErr) } + if tc.expectedReplaceManifestsParams != nil { + actualReplaceManifestsParams, converted := tc.returnRepoWriter.Calls[0].Arguments.Get(2).(*maintenance.Params) + assert.True(t, converted) + tc.assertReplaceManifestsParams(tc.expectedReplaceManifestsParams, actualReplaceManifestsParams) + } }) } } diff --git a/pkg/repository/udmrepo/repo_options.go b/pkg/repository/udmrepo/repo_options.go index ad4c597f92..efddfdcd10 100644 --- a/pkg/repository/udmrepo/repo_options.go +++ b/pkg/repository/udmrepo/repo_options.go @@ -20,6 +20,7 @@ import ( "os" "path/filepath" "strings" + "time" ) const ( @@ -70,8 +71,19 @@ const ( ThrottleOptionListOps = "listOPS" ThrottleOptionUploadBytes = "uploadBytes" ThrottleOptionDownloadBytes = "downloadBytes" + // FullMaintenanceInterval will overwrite kopia maintenance interval + // options are fastGC for 12 hours, eagerGC for 6 hours, normalGC for 24 hours + StoreOptionKeyFullMaintenanceInterval = "fullMaintenanceInterval" + FastGC FullMaintenanceIntervalOptions = "fastGC" + FastGCInterval time.Duration = 12 * time.Hour + EagerGC FullMaintenanceIntervalOptions = "eagerGC" + EagerGCInterval time.Duration = 6 * time.Hour + NormalGC FullMaintenanceIntervalOptions = "normalGC" + NormalGCInterval time.Duration = 24 * time.Hour ) +type FullMaintenanceIntervalOptions string + const ( defaultUsername = "default" defaultDomain = "default" diff --git a/site/content/docs/main/backup-repository-configuration.md b/site/content/docs/main/backup-repository-configuration.md index 46301c54eb..fd6cf0b781 100644 --- a/site/content/docs/main/backup-repository-configuration.md +++ b/site/content/docs/main/backup-repository-configuration.md @@ -30,7 +30,8 @@ metadata: data: : | { - "cacheLimitMB": 2048 + "cacheLimitMB": 2048, + "fullMaintenanceInterval": "fastGC" } : | { @@ -49,6 +50,14 @@ Below is the supported configurations by Velero and the specific backup reposito ***Kopia repository:*** `cacheLimitMB`: specifies the size limit(in MB) for the local data cache. The more data is cached locally, the less data may be downloaded from the backup storage, so the better performance may be achieved. Practically, you can specify any size that is smaller than the free space so that the disk space won't run out. This parameter is for repository connection, that is, you could change it before connecting to the repository. E.g., before a backup/restore/maintenance. +`fullMaintenanceInterval`: The full maintenance interval defaults to kopia defaults of 24 hours. Override options below allows for faster removal of deleted velero backups from kopia repo. +- normalGC: 24 hours +- fastGC: 12 hours +- eagerGC: 6 hours + +Per kopia [Maintenance Safety](https://kopia.io/docs/advanced/maintenance/#maintenance-safety), it is expected that velero backup deletion will not result in immediate kopia repository data removal. Reducing full maintenance interval using above options should help reduce time taken to remove blobs not in use. + +On the other hand, the not-in-use data will be deleted permanently after the full maintenance, so shorter full maintenance intervals may weaken the data safety if they are used incorrectly. [1]: file-system-backup.md -[2]: csi-snapshot-data-movement.md \ No newline at end of file +[2]: csi-snapshot-data-movement.md diff --git a/site/content/docs/main/repository-maintenance.md b/site/content/docs/main/repository-maintenance.md index 8c712a9d7c..7a0bb53c64 100644 --- a/site/content/docs/main/repository-maintenance.md +++ b/site/content/docs/main/repository-maintenance.md @@ -51,7 +51,7 @@ For example, the following BackupRepository's key should be `test-default-kopia` You can still customize the maintenance job resource requests and limit when using the [velero install][1] CLI command. -The `LoadAffinity` structure is reused from design [node-agent affinity configuration](2). +The `LoadAffinity` structure is reused from design [node-agent affinity configuration][2]. ### Affinity Example It's possible that the users want to choose nodes that match condition A or condition B to run the job. @@ -130,8 +130,12 @@ velero install --default-repo-maintain-frequency ``` For Kopia the default maintenance frequency is 1 hour, and Restic is 7 * 24 hours. +### Full Maintenance Interval customization +See [backup repository configuration][3] + ### Others Maintenance jobs will inherit the labels, annotations, toleration, nodeSelector, service account, image, environment variables, cloud-credentials etc. from Velero deployment. [1]: velero-install.md#usage -[2]: node-agent-concurrency.md \ No newline at end of file +[2]: node-agent-concurrency.md +[3]: backup-repository-configuration.md#full-maintenance-interval-customization