Skip to content

Commit df2658d

Browse files
authored
More test determinism fixes (#13)
Improve test logic to help find the root cause of flakes
1 parent 2eb6bcc commit df2658d

File tree

3 files changed

+82
-30
lines changed

3 files changed

+82
-30
lines changed

internal/controller/questdbsnapshotschedule_controller.go

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ type QuestDBSnapshotScheduleReconciler struct {
5656
func (r *QuestDBSnapshotScheduleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
5757
var (
5858
err error
59-
latestSnap *crdv1beta1.QuestDBSnapshot
59+
latestSnap = &crdv1beta1.QuestDBSnapshot{}
6060

6161
sched = &crdv1beta1.QuestDBSnapshotSchedule{}
6262
childSnapshots = &crdv1beta1.QuestDBSnapshotList{}
@@ -77,24 +77,20 @@ func (r *QuestDBSnapshotScheduleReconciler) Reconcile(ctx context.Context, req c
7777
}
7878

7979
// Get child snapshots
80-
if childSnapshots, err = r.getChildSnapshots(ctx, sched); err != nil {
80+
if childSnapshots, err = r.getOrderedChildSnapshots(ctx, sched); err != nil {
8181
return ctrl.Result{}, err
8282
}
8383

8484
// Since snapshots are sorted in descending order, the latest snapshot is the first item
8585
if len(childSnapshots.Items) > 0 {
86-
if len(childSnapshots.Items) > 0 {
87-
latestSnap = &childSnapshots.Items[0]
88-
}
86+
latestSnap = &childSnapshots.Items[0]
8987
}
9088

9189
// Update the snapshot phase status based on the latest snapshot
92-
if latestSnap != nil {
93-
if latestSnap.Status.Phase != sched.Status.SnapshotPhase {
94-
sched.Status.SnapshotPhase = latestSnap.Status.Phase
95-
if err = r.Status().Update(ctx, sched); err != nil {
96-
return ctrl.Result{}, err
97-
}
90+
if latestSnap.Status.Phase != sched.Status.SnapshotPhase {
91+
sched.Status.SnapshotPhase = latestSnap.Status.Phase
92+
if err = r.Status().Update(ctx, sched); err != nil {
93+
return ctrl.Result{}, err
9894
}
9995
}
10096

@@ -113,7 +109,7 @@ func (r *QuestDBSnapshotScheduleReconciler) Reconcile(ctx context.Context, req c
113109
}
114110

115111
// Skip taking a snapshot if the latest snapshot is not complete (succeeded or failed or empty phase)
116-
if latestSnap != nil && !latestSnap.IsComplete() {
112+
if sched.Status.SnapshotPhase != "" && !latestSnap.IsComplete() {
117113
r.Recorder.Event(sched, "Warning", "SnapshotSkipped", fmt.Sprintf("Skipping snapshot because the latest snapshot is not complete: %s", latestSnap.Name))
118114
} else {
119115
// Otherwise, build the snapshot
@@ -170,7 +166,7 @@ func (r *QuestDBSnapshotScheduleReconciler) buildSnapshot(sched *crdv1beta1.Ques
170166

171167
}
172168

173-
func (r *QuestDBSnapshotScheduleReconciler) getChildSnapshots(ctx context.Context, sched *crdv1beta1.QuestDBSnapshotSchedule) (*crdv1beta1.QuestDBSnapshotList, error) {
169+
func (r *QuestDBSnapshotScheduleReconciler) getOrderedChildSnapshots(ctx context.Context, sched *crdv1beta1.QuestDBSnapshotSchedule) (*crdv1beta1.QuestDBSnapshotList, error) {
174170
var (
175171
err error
176172

internal/controller/questdbsnapshotschedule_controller_test.go

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package controller
22

33
import (
4+
"context"
5+
"fmt"
46
"time"
57

68
. "github.com/onsi/ginkgo/v2"
@@ -15,12 +17,15 @@ import (
1517
"k8s.io/utils/pointer"
1618
ctrl "sigs.k8s.io/controller-runtime"
1719
"sigs.k8s.io/controller-runtime/pkg/client"
20+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
1821
)
1922

2023
var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
2124
var (
2225
timeout = time.Second * 2
2326
interval = time.Millisecond * 100
27+
28+
recorder = record.NewFakeRecorder(100)
2429
)
2530

2631
Context("golden path case", Ordered, func() {
@@ -37,7 +42,7 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
3742
r = &QuestDBSnapshotScheduleReconciler{
3843
Client: k8sClient,
3944
Scheme: scheme.Scheme,
40-
Recorder: record.NewFakeRecorder(100),
45+
Recorder: recorder,
4146
TimeSource: abtime.NewManual(),
4247
}
4348

@@ -68,7 +73,7 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
6873
timeSource = r.TimeSource.(*abtime.ManualTime)
6974

7075
By("Reconciling the QuestDBSnapshotSchedule")
71-
res, err := r.Reconcile(ctx, ctrl.Request{
76+
res, err := reconcileSnapshotSchedules(ctx, r, ctrl.Request{
7277
NamespacedName: client.ObjectKeyFromObject(sched),
7378
})
7479
Expect(err).ToNot(HaveOccurred())
@@ -83,7 +88,7 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
8388
advanceToTheNextMinute(timeSource)
8489

8590
By("Forcing a reconcile")
86-
_, err := r.Reconcile(ctx, ctrl.Request{
91+
_, err := reconcileSnapshotSchedules(ctx, r, ctrl.Request{
8792
NamespacedName: client.ObjectKeyFromObject(sched),
8893
})
8994
Expect(err).ToNot(HaveOccurred())
@@ -117,10 +122,10 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
117122
}, timeout, interval).Should(Succeed())
118123

119124
By("Advancing time a few milliseconds")
120-
timeSource.Advance(time.Millisecond * 2)
125+
advanceTime(timeSource, time.Millisecond*2)
121126

122127
By("Forcing a reconcile")
123-
_, err := r.Reconcile(ctx, ctrl.Request{
128+
_, err := reconcileSnapshotSchedules(ctx, r, ctrl.Request{
124129
NamespacedName: client.ObjectKeyFromObject(sched),
125130
})
126131
Expect(err).ToNot(HaveOccurred())
@@ -135,7 +140,7 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
135140
advanceToTheNextMinute(timeSource)
136141

137142
By("Forcing a reconcile")
138-
_, err := r.Reconcile(ctx, ctrl.Request{
143+
_, err := reconcileSnapshotSchedules(ctx, r, ctrl.Request{
139144
NamespacedName: client.ObjectKeyFromObject(sched),
140145
})
141146
Expect(err).ToNot(HaveOccurred())
@@ -177,10 +182,10 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
177182
Expect(k8sClient.Status().Update(ctx, latestSnap)).To(Succeed())
178183

179184
By("Advancing time a few milliseconds to avoid creating a new snapshot")
180-
timeSource.Advance(5 * time.Millisecond)
185+
advanceTime(timeSource, 5*time.Millisecond)
181186

182187
By("Forcing a reconcile")
183-
_, err := r.Reconcile(ctx, ctrl.Request{
188+
_, err := reconcileSnapshotSchedules(ctx, r, ctrl.Request{
184189
NamespacedName: client.ObjectKeyFromObject(sched),
185190
})
186191
Expect(err).ToNot(HaveOccurred())
@@ -204,7 +209,7 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
204209
r = &QuestDBSnapshotScheduleReconciler{
205210
Client: k8sClient,
206211
Scheme: scheme.Scheme,
207-
Recorder: record.NewFakeRecorder(100),
212+
Recorder: recorder,
208213
TimeSource: abtime.NewManual(),
209214
}
210215

@@ -240,10 +245,10 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
240245
Expect(k8sClient.Create(ctx, snap)).To(Succeed())
241246

242247
By("Advancing time at least a minute to trigger a reconcile")
243-
r.TimeSource.(*abtime.ManualTime).Advance(time.Minute + 5*time.Second)
248+
advanceTime(r.TimeSource.(*abtime.ManualTime), time.Minute+5*time.Second)
244249

245250
By("Forcing a reconcile")
246-
_, err := r.Reconcile(ctx, ctrl.Request{
251+
_, err := reconcileSnapshotSchedules(ctx, r, ctrl.Request{
247252
NamespacedName: client.ObjectKeyFromObject(sched),
248253
})
249254
Expect(err).ToNot(HaveOccurred())
@@ -272,7 +277,7 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
272277
r = &QuestDBSnapshotScheduleReconciler{
273278
Client: k8sClient,
274279
Scheme: scheme.Scheme,
275-
Recorder: record.NewFakeRecorder(100),
280+
Recorder: recorder,
276281
TimeSource: abtime.NewManual(),
277282
}
278283

@@ -304,8 +309,8 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
304309
By("Advancing time by minute enough to create retention * 2 snapshots")
305310
for i := int32(0); i < retention*2; i++ {
306311

307-
timeSource.Advance(time.Minute)
308-
_, err := r.Reconcile(ctx, ctrl.Request{
312+
advanceTime(timeSource, time.Minute)
313+
_, err := reconcileSnapshotSchedules(ctx, r, ctrl.Request{
309314
NamespacedName: client.ObjectKeyFromObject(sched),
310315
})
311316
Expect(err).ToNot(HaveOccurred())
@@ -325,6 +330,8 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
325330
}
326331

327332
By("Getting all snapshots and ensuring that they are all failed")
333+
// We need to migrate snapshots from Pending to either Succeeded or Failed for the reconciler
334+
// to create a new snapshot. Since we want to test retention of failed
328335
snapList := &crdv1beta1.QuestDBSnapshotList{}
329336
Eventually(func(g Gomega) {
330337
g.Expect(k8sClient.List(ctx, snapList, client.InNamespace(sched.Namespace))).Should(Succeed())
@@ -347,24 +354,71 @@ var _ = Describe("QuestDBSnapshotSchedule Controller", func() {
347354
}
348355

349356
By("Advancing time a small amount to not trigger another snapshot creation")
350-
r.TimeSource.(*abtime.ManualTime).Advance(5 * time.Millisecond)
357+
advanceTime(r.TimeSource.(*abtime.ManualTime), 5*time.Millisecond)
351358

352359
By("Forcing a reconcile")
353-
_, err := r.Reconcile(ctx, ctrl.Request{
360+
_, err := reconcileSnapshotSchedules(ctx, r, ctrl.Request{
354361
NamespacedName: client.ObjectKeyFromObject(sched),
355362
})
356363
Expect(err).ToNot(HaveOccurred())
357364

358365
By("Checking that the list has shrunk by 1")
359366
Expect(k8sClient.List(ctx, snapList, client.InNamespace(sched.Namespace))).Should(Succeed())
367+
succeeded := []crdv1beta1.QuestDBSnapshot{}
368+
failed := []crdv1beta1.QuestDBSnapshot{}
369+
for _, snap := range snapList.Items {
370+
switch snap.Status.Phase {
371+
case crdv1beta1.SnapshotSucceeded:
372+
succeeded = append(succeeded, snap)
373+
case crdv1beta1.SnapshotFailed:
374+
failed = append(failed, snap)
375+
default:
376+
Expect(false).To(Equal(true), fmt.Sprintf("no snapshot should have phase %q", snap.Status.Phase))
377+
}
378+
}
379+
Expect(succeeded).Should(HaveLen(int(retention)))
380+
Expect(failed).Should(HaveLen(int(retention - 1)))
360381
Expect(snapList.Items).To(HaveLen(int(retention*2 - 1)))
361382

362383
})
363384

364385
})
365386

366387
func advanceToTheNextMinute(timeSource *abtime.ManualTime) {
367-
nextMinute := timeSource.Now().Add(time.Minute).Truncate(time.Minute)
388+
now := timeSource.Now()
389+
nextMinute := now.Add(time.Minute).Truncate(time.Minute)
368390
timeToNextMinute := nextMinute.Sub(timeSource.Now())
369391
timeSource.Advance(timeToNextMinute)
392+
testDebugLog.Info("Advanced Time To Next Minute",
393+
"oldTime", now.Format(time.RFC3339Nano),
394+
"nextMinute", nextMinute.Format(time.RFC3339Nano),
395+
"newTime", timeSource.Now().Format(time.RFC3339Nano),
396+
)
397+
}
398+
399+
func advanceTime(timeSource *abtime.ManualTime, d time.Duration) {
400+
now := timeSource.Now()
401+
timeSource.Advance(d)
402+
testDebugLog.Info("Advanced Time",
403+
"oldTime", now.Format(time.RFC3339Nano),
404+
"duration", d.String(),
405+
"newTime", timeSource.Now().Format(time.RFC3339Nano),
406+
)
407+
}
408+
409+
func reconcileSnapshotSchedules(ctx context.Context, r *QuestDBSnapshotScheduleReconciler, req reconcile.Request) (reconcile.Result, error) {
410+
testDebugLog.Info("Reconciling", "resource", req.String())
411+
res, err := r.Reconcile(ctx, req)
412+
413+
// Flush the event buffer synchronously so we can accurately determine when things are happening
414+
var flushed bool
415+
for !flushed {
416+
select {
417+
case e := <-r.Recorder.(*record.FakeRecorder).Events:
418+
testDebugLog.Info(e, "Reconciler", "QuestDBSnapshotSchedule")
419+
default:
420+
flushed = true
421+
}
422+
}
423+
return res, err
370424
}

internal/controller/suite_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ var (
5252

5353
ctx context.Context
5454
cancel context.CancelFunc
55+
56+
testDebugLog = ctrl.Log.WithName("test-debug")
5557
)
5658

5759
func TestAPIs(t *testing.T) {

0 commit comments

Comments
 (0)