Skip to content

Commit bbe528d

Browse files
committed
Update Scheduled Backup CronJob Name and Reconcile by Labels
This commit updates the scheduled backup CronJob name format, previously '<cluster name>-pgbackrest-repo#-<upgrade type>', to a new shorter format, '<cluster name>-repo#-<upgrade type>', to better support longer PostgresCluster names. Also, to facilitate moving from older PGO versions using the previous format, the scheduled backup CronJobs are now reconciled by Label to ensure any existing CronJobs can continue to be used by the cluster. For more information on name limitations, see: https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/ Issue: [sc-14068]
1 parent 4d8990a commit bbe528d

File tree

4 files changed

+98
-20
lines changed

4 files changed

+98
-20
lines changed

docs/content/tutorial/disaster-recovery.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ NAME READY AGE
315315
statefulset.apps/hippo-00-lwgx 0/0 1h
316316
317317
NAME SCHEDULE SUSPEND ACTIVE
318-
cronjob.batch/hippo-pgbackrest-repo1-full @daily True 0
318+
cronjob.batch/hippo-repo1-full @daily True 0
319319
```
320320

321321
We can then promote the standby cluster by removing or disabling its

internal/controller/postgrescluster/pgbackrest.go

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1332,7 +1332,7 @@ func (r *Reconciler) reconcilePGBackRest(ctx context.Context,
13321332
result = updateReconcileResult(result, reconcile.Result{RequeueAfter: 10 * time.Second})
13331333
}
13341334
// reconcile the pgBackRest backup CronJobs
1335-
requeue := r.reconcileScheduledBackups(ctx, postgresCluster, sa)
1335+
requeue := r.reconcileScheduledBackups(ctx, postgresCluster, sa, repoResources.cronjobs)
13361336
// If the pgBackRest backup CronJob reconciliation function has encountered an error, requeue
13371337
// after 10 seconds. The error will not bubble up to allow the reconcile loop to continue.
13381338
// An error is not logged because an event was already created.
@@ -2748,6 +2748,7 @@ func getRepoVolumeStatus(repoStatus []v1beta1.RepoStatus, repoVolumes []*corev1.
27482748
// schedules configured in the cluster definition
27492749
func (r *Reconciler) reconcileScheduledBackups(
27502750
ctx context.Context, cluster *v1beta1.PostgresCluster, sa *corev1.ServiceAccount,
2751+
cronjobs []*batchv1beta1.CronJob,
27512752
) bool {
27522753
log := logging.FromContext(ctx).WithValues("reconcileResource", "repoCronJob")
27532754
// requeue if there is an error during creation
@@ -2760,21 +2761,21 @@ func (r *Reconciler) reconcileScheduledBackups(
27602761
// next if the repo level schedule is not nil, create the CronJob.
27612762
if repo.BackupSchedules.Full != nil {
27622763
if err := r.reconcilePGBackRestCronJob(ctx, cluster, repo,
2763-
full, repo.BackupSchedules.Full, sa); err != nil {
2764+
full, repo.BackupSchedules.Full, sa, cronjobs); err != nil {
27642765
log.Error(err, "unable to reconcile Full backup for "+repo.Name)
27652766
requeue = true
27662767
}
27672768
}
27682769
if repo.BackupSchedules.Differential != nil {
27692770
if err := r.reconcilePGBackRestCronJob(ctx, cluster, repo,
2770-
differential, repo.BackupSchedules.Differential, sa); err != nil {
2771+
differential, repo.BackupSchedules.Differential, sa, cronjobs); err != nil {
27712772
log.Error(err, "unable to reconcile Differential backup for "+repo.Name)
27722773
requeue = true
27732774
}
27742775
}
27752776
if repo.BackupSchedules.Incremental != nil {
27762777
if err := r.reconcilePGBackRestCronJob(ctx, cluster, repo,
2777-
incremental, repo.BackupSchedules.Incremental, sa); err != nil {
2778+
incremental, repo.BackupSchedules.Incremental, sa, cronjobs); err != nil {
27782779
log.Error(err, "unable to reconcile Incremental backup for "+repo.Name)
27792780
requeue = true
27802781
}
@@ -2791,6 +2792,7 @@ func (r *Reconciler) reconcileScheduledBackups(
27912792
func (r *Reconciler) reconcilePGBackRestCronJob(
27922793
ctx context.Context, cluster *v1beta1.PostgresCluster, repo v1beta1.PGBackRestRepo,
27932794
backupType string, schedule *string, serviceAccount *corev1.ServiceAccount,
2795+
cronjobs []*batchv1beta1.CronJob,
27942796
) error {
27952797

27962798
log := logging.FromContext(ctx).WithValues("reconcileResource", "repoCronJob")
@@ -2804,6 +2806,25 @@ func (r *Reconciler) reconcilePGBackRestCronJob(
28042806
naming.PGBackRestCronJobLabels(cluster.Name, repo.Name, backupType),
28052807
)
28062808
objectmeta := naming.PGBackRestCronJob(cluster, backupType, repo.Name)
2809+
2810+
// Look for an existing CronJob by the associated Labels. If one exists,
2811+
// update the ObjectMeta accordingly.
2812+
for _, cronjob := range cronjobs {
2813+
// ignore CronJobs that are terminating
2814+
if cronjob.GetDeletionTimestamp() != nil {
2815+
continue
2816+
}
2817+
2818+
if cronjob.GetLabels()[naming.LabelCluster] == cluster.Name &&
2819+
cronjob.GetLabels()[naming.LabelPGBackRestCronJob] == backupType &&
2820+
cronjob.GetLabels()[naming.LabelPGBackRestRepo] == repo.Name {
2821+
objectmeta = metav1.ObjectMeta{
2822+
Namespace: cluster.GetNamespace(),
2823+
Name: cronjob.Name,
2824+
}
2825+
}
2826+
}
2827+
28072828
objectmeta.Labels = labels
28082829
objectmeta.Annotations = annotations
28092830

internal/controller/postgrescluster/pgbackrest_test.go

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,14 @@ func fakePostgresCluster(clusterName, namespace, clusterUID string,
173173
return postgresCluster
174174
}
175175

176+
func fakeObservedCronJobs() []*batchv1beta1.CronJob {
177+
return []*batchv1beta1.CronJob{
178+
{
179+
ObjectMeta: metav1.ObjectMeta{
180+
Name: "fake-cronjob",
181+
}}}
182+
}
183+
176184
func TestReconcilePGBackRest(t *testing.T) {
177185
// Garbage collector cleans up test resources before the test completes
178186
if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") {
@@ -507,19 +515,19 @@ topologySpreadConstraints:
507515
Type: condition, Reason: "testing", Status: status})
508516
}
509517

510-
requeue := r.reconcileScheduledBackups(ctx, postgresCluster, serviceAccount)
518+
requeue := r.reconcileScheduledBackups(ctx, postgresCluster, serviceAccount, fakeObservedCronJobs())
511519
assert.Assert(t, !requeue)
512520

513521
returnedCronJob := &batchv1beta1.CronJob{}
514522
if err := tClient.Get(ctx, types.NamespacedName{
515-
Name: postgresCluster.Name + "-pgbackrest-repo1-full",
523+
Name: postgresCluster.Name + "-repo1-full",
516524
Namespace: postgresCluster.GetNamespace(),
517525
}, returnedCronJob); err != nil {
518526
assert.NilError(t, err)
519527
}
520528

521529
// check returned cronjob matches set spec
522-
assert.Equal(t, returnedCronJob.Name, "hippocluster-pgbackrest-repo1-full")
530+
assert.Equal(t, returnedCronJob.Name, "hippocluster-repo1-full")
523531
assert.Equal(t, returnedCronJob.Spec.Schedule, testCronSchedule)
524532
assert.Equal(t, returnedCronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Name,
525533
"pgbackrest")
@@ -558,7 +566,7 @@ topologySpreadConstraints:
558566

559567
returnedCronJob := &batchv1beta1.CronJob{}
560568
if err := tClient.Get(ctx, types.NamespacedName{
561-
Name: postgresCluster.Name + "-pgbackrest-repo1-full",
569+
Name: postgresCluster.Name + "-repo1-full",
562570
Namespace: postgresCluster.GetNamespace(),
563571
}, returnedCronJob); err != nil {
564572
assert.NilError(t, err)
@@ -573,11 +581,11 @@ topologySpreadConstraints:
573581
postgresCluster.Spec.Standby = nil
574582

575583
requeue := r.reconcileScheduledBackups(ctx,
576-
postgresCluster, serviceAccount)
584+
postgresCluster, serviceAccount, fakeObservedCronJobs())
577585
assert.Assert(t, !requeue)
578586

579587
assert.NilError(t, tClient.Get(ctx, types.NamespacedName{
580-
Name: postgresCluster.Name + "-pgbackrest-repo1-full",
588+
Name: postgresCluster.Name + "-repo1-full",
581589
Namespace: postgresCluster.GetNamespace(),
582590
}, returnedCronJob))
583591

@@ -591,11 +599,11 @@ topologySpreadConstraints:
591599
}
592600

593601
requeue := r.reconcileScheduledBackups(ctx,
594-
postgresCluster, serviceAccount)
602+
postgresCluster, serviceAccount, fakeObservedCronJobs())
595603
assert.Assert(t, !requeue)
596604

597605
assert.NilError(t, tClient.Get(ctx, types.NamespacedName{
598-
Name: postgresCluster.Name + "-pgbackrest-repo1-full",
606+
Name: postgresCluster.Name + "-repo1-full",
599607
Namespace: postgresCluster.GetNamespace(),
600608
}, returnedCronJob))
601609

@@ -3327,6 +3335,8 @@ func TestReconcileScheduledBackups(t *testing.T) {
33273335
expectedEventReason string
33283336
// the observed instances
33293337
instances *observedInstances
3338+
// CronJobs exist
3339+
cronJobs bool
33303340
}{
33313341
{
33323342
testDesc: "should reconcile, no requeue",
@@ -3341,6 +3351,20 @@ func TestReconcileScheduledBackups(t *testing.T) {
33413351
},
33423352
expectReconcile: true,
33433353
expectRequeue: false,
3354+
}, {
3355+
testDesc: "should reconcile, no requeue, existing cronjob",
3356+
clusterConditions: map[string]metav1.ConditionStatus{
3357+
ConditionRepoHostReady: metav1.ConditionTrue,
3358+
ConditionReplicaCreate: metav1.ConditionTrue,
3359+
},
3360+
status: &v1beta1.PostgresClusterStatus{
3361+
Patroni: v1beta1.PatroniStatus{SystemIdentifier: "12345abcde"},
3362+
PGBackRest: &v1beta1.PGBackRestStatus{
3363+
Repos: []v1beta1.RepoStatus{{Name: "repo1", StanzaCreated: true}}},
3364+
},
3365+
expectReconcile: true,
3366+
expectRequeue: false,
3367+
cronJobs: true,
33443368
}, {
33453369
testDesc: "cluster not bootstrapped, should not reconcile",
33463370
status: &v1beta1.PostgresClusterStatus{
@@ -3432,12 +3456,38 @@ func TestReconcileScheduledBackups(t *testing.T) {
34323456
assert.NilError(t, tClient.Status().Update(ctx, postgresCluster))
34333457

34343458
var requeue bool
3435-
if tc.instances != nil {
3436-
requeue = r.reconcileScheduledBackups(ctx, postgresCluster, sa)
3459+
if tc.cronJobs {
3460+
existingCronJobs := []*batchv1beta1.CronJob{
3461+
{
3462+
ObjectMeta: metav1.ObjectMeta{
3463+
Name: "existingcronjob-repo1-full",
3464+
Labels: map[string]string{
3465+
naming.LabelCluster: clusterName,
3466+
naming.LabelPGBackRestCronJob: "full",
3467+
naming.LabelPGBackRestRepo: "repo1",
3468+
}},
3469+
}, {
3470+
ObjectMeta: metav1.ObjectMeta{
3471+
Name: "existingcronjob-repo1-incr",
3472+
Labels: map[string]string{
3473+
naming.LabelCluster: clusterName,
3474+
naming.LabelPGBackRestCronJob: "incr",
3475+
naming.LabelPGBackRestRepo: "repo1",
3476+
}},
3477+
}, {
3478+
ObjectMeta: metav1.ObjectMeta{
3479+
Name: "existingcronjob-repo1-diff",
3480+
Labels: map[string]string{
3481+
naming.LabelCluster: clusterName,
3482+
naming.LabelPGBackRestCronJob: "diff",
3483+
naming.LabelPGBackRestRepo: "repo1",
3484+
}},
3485+
},
3486+
}
3487+
requeue = r.reconcileScheduledBackups(ctx, postgresCluster, sa, existingCronJobs)
34373488
} else {
3438-
requeue = r.reconcileScheduledBackups(ctx, postgresCluster, sa)
3489+
requeue = r.reconcileScheduledBackups(ctx, postgresCluster, sa, fakeObservedCronJobs())
34393490
}
3440-
34413491
if !tc.expectReconcile && !tc.expectRequeue {
34423492
// expect no reconcile, no requeue
34433493
assert.Assert(t, !requeue)
@@ -3475,16 +3525,23 @@ func TestReconcileScheduledBackups(t *testing.T) {
34753525

34763526
for _, backupType := range backupTypes {
34773527

3528+
var cronJobName string
3529+
if tc.cronJobs {
3530+
cronJobName = "existingcronjob-repo1-" + backupType
3531+
} else {
3532+
cronJobName = postgresCluster.Name + "-repo1-" + backupType
3533+
}
3534+
34783535
returnedCronJob := &batchv1beta1.CronJob{}
34793536
if err := tClient.Get(ctx, types.NamespacedName{
3480-
Name: postgresCluster.Name + "-pgbackrest-repo1-" + backupType,
3537+
Name: cronJobName,
34813538
Namespace: postgresCluster.GetNamespace(),
34823539
}, returnedCronJob); err != nil {
34833540
assert.NilError(t, err)
34843541
}
34853542

34863543
// check returned cronjob matches set spec
3487-
assert.Equal(t, returnedCronJob.Name, clusterName+"-pgbackrest-repo1-"+backupType)
3544+
assert.Equal(t, returnedCronJob.Name, cronJobName)
34883545
assert.Equal(t, returnedCronJob.Spec.Schedule, testCronSchedule)
34893546
assert.Equal(t, returnedCronJob.Spec.JobTemplate.Spec.Template.Spec.PriorityClassName, "some-priority-class")
34903547
assert.Equal(t, returnedCronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Name,

internal/naming/names.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ func PGBackRestBackupJob(cluster *v1beta1.PostgresCluster) metav1.ObjectMeta {
418418
func PGBackRestCronJob(cluster *v1beta1.PostgresCluster, backuptype, repoName string) metav1.ObjectMeta {
419419
return metav1.ObjectMeta{
420420
Namespace: cluster.GetNamespace(),
421-
Name: cluster.Name + "-pgbackrest-" + repoName + "-" + backuptype,
421+
Name: cluster.Name + "-" + repoName + "-" + backuptype,
422422
}
423423
}
424424

0 commit comments

Comments
 (0)