Skip to content

Commit 03d4d9f

Browse files
authored
fix(watcher): add debouncing to kubeconfig watcher for Windows compatibility (containers#545)
The kubeconfig watcher was triggering multiple callbacks on Windows due to fsnotify generating multiple events for a single file write operation. This caused race conditions during provider reset, leading to flaky tests. Changes: - Add debouncing to Kubeconfig watcher (default 100ms, configurable via KUBECONFIG_DEBOUNCE_WINDOW_MS environment variable) - Add proper close synchronization with stopCh/stoppedCh channels - Align Kubeconfig watcher implementation with ClusterState watcher pattern Signed-off-by: Marc Nuri <[email protected]>
1 parent a356eb8 commit 03d4d9f

File tree

3 files changed

+231
-41
lines changed

3 files changed

+231
-41
lines changed

pkg/kubernetes/provider_watch_test.go

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ func (s *ProviderWatchTargetsTestSuite) SetupTest() {
2929

3030
s.T().Setenv("CLUSTER_STATE_POLL_INTERVAL_MS", "100")
3131
s.T().Setenv("CLUSTER_STATE_DEBOUNCE_WINDOW_MS", "50")
32+
s.T().Setenv("KUBECONFIG_DEBOUNCE_WINDOW_MS", "50")
3233

3334
// Add multiple fake contexts to allow testing of context changes
3435
s.kubeconfig = s.mockServer.Kubeconfig()
@@ -103,16 +104,11 @@ func (s *ProviderWatchTargetsTestSuite) TestKubeConfigClusterProvider() {
103104
})
104105

105106
s.Run("Keeps watching for further changes", func() {
106-
kubeconfig := *s.kubeconfig
107-
kubeconfig.CurrentContext = "context-2"
108-
s.Require().NoError(clientcmd.WriteToFile(kubeconfig, s.staticConfig.KubeConfig))
107+
s.kubeconfig.CurrentContext = "context-2"
108+
s.Require().NoError(clientcmd.WriteToFile(*s.kubeconfig, s.staticConfig.KubeConfig))
109109
s.Require().NoError(waitForCallback(5 * time.Second))
110110

111111
s.Run("Replaces default target with new context", func() {
112-
// In Windows systems, fsnotify may trigger multiple events for a single file change,
113-
s.Require().NoError(test.WaitForCondition(5*time.Second, func() bool {
114-
return provider.GetDefaultTarget() == "context-2"
115-
}))
116112
s.Equal("context-2", provider.GetDefaultTarget(), "Expected default target context to be updated")
117113
})
118114
})
Lines changed: 92 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,64 @@
11
package watcher
22

33
import (
4+
"os"
5+
"strconv"
6+
"sync"
7+
"time"
8+
49
"github.com/fsnotify/fsnotify"
510
"k8s.io/client-go/tools/clientcmd"
11+
"k8s.io/klog/v2"
12+
)
13+
14+
const (
15+
// DefaultKubeconfigDebounceWindow is the default debounce window for kubeconfig file changes
16+
DefaultKubeconfigDebounceWindow = 100 * time.Millisecond
617
)
718

819
type Kubeconfig struct {
920
clientcmd.ClientConfig
10-
close func()
21+
debounceWindow time.Duration
22+
debounceTimer *time.Timer
23+
mu sync.Mutex
24+
stopCh chan struct{}
25+
stoppedCh chan struct{}
26+
started bool
1127
}
1228

1329
var _ Watcher = (*Kubeconfig)(nil)
1430

1531
func NewKubeconfig(clientConfig clientcmd.ClientConfig) *Kubeconfig {
32+
debounceWindow := DefaultKubeconfigDebounceWindow
33+
34+
// Allow override via environment variable for testing
35+
if envDebounce := os.Getenv("KUBECONFIG_DEBOUNCE_WINDOW_MS"); envDebounce != "" {
36+
if ms, err := strconv.Atoi(envDebounce); err == nil && ms > 0 {
37+
debounceWindow = time.Duration(ms) * time.Millisecond
38+
klog.V(2).Infof("Using custom kubeconfig debounce window: %v", debounceWindow)
39+
}
40+
}
41+
1642
return &Kubeconfig{
17-
ClientConfig: clientConfig,
43+
ClientConfig: clientConfig,
44+
debounceWindow: debounceWindow,
45+
stopCh: make(chan struct{}),
46+
stoppedCh: make(chan struct{}),
1847
}
1948
}
2049

50+
// Watch starts a background watcher that monitors kubeconfig file changes
51+
// and triggers a debounced reload when changes are detected.
52+
// It can only be called once per Kubeconfig instance.
2153
func (w *Kubeconfig) Watch(onChange func() error) {
54+
w.mu.Lock()
55+
if w.started {
56+
w.mu.Unlock()
57+
return
58+
}
59+
w.started = true
60+
w.mu.Unlock()
61+
2262
kubeConfigFiles := w.ConfigAccess().GetLoadingPrecedence()
2363
if len(kubeConfigFiles) == 0 {
2464
return
@@ -30,29 +70,72 @@ func (w *Kubeconfig) Watch(onChange func() error) {
3070
for _, file := range kubeConfigFiles {
3171
_ = watcher.Add(file)
3272
}
73+
3374
go func() {
75+
defer close(w.stoppedCh)
76+
defer func() { _ = watcher.Close() }()
77+
78+
klog.V(2).Infof("Started kubeconfig watcher (debounce: %v)", w.debounceWindow)
79+
3480
for {
3581
select {
82+
case <-w.stopCh:
83+
klog.V(2).Info("Stopping kubeconfig watcher")
84+
return
3685
case _, ok := <-watcher.Events:
3786
if !ok {
3887
return
3988
}
40-
_ = onChange()
89+
w.mu.Lock()
90+
klog.V(3).Info("Kubeconfig file change detected, scheduling debounced reload")
91+
if w.debounceTimer != nil {
92+
w.debounceTimer.Stop()
93+
}
94+
w.debounceTimer = time.AfterFunc(w.debounceWindow, func() {
95+
klog.V(2).Info("Kubeconfig debounce window expired, triggering reload")
96+
if err := onChange(); err != nil {
97+
klog.Errorf("Failed to reload after kubeconfig change: %v", err)
98+
}
99+
})
100+
w.mu.Unlock()
41101
case _, ok := <-watcher.Errors:
42102
if !ok {
43103
return
44104
}
45105
}
46106
}
47107
}()
48-
if w.close != nil {
49-
w.close()
50-
}
51-
w.close = func() { _ = watcher.Close() }
52108
}
53109

110+
// Close stops the kubeconfig watcher
54111
func (w *Kubeconfig) Close() {
55-
if w.close != nil {
56-
w.close()
112+
w.mu.Lock()
113+
defer w.mu.Unlock()
114+
115+
if w.debounceTimer != nil {
116+
w.debounceTimer.Stop()
117+
}
118+
119+
if w.stopCh == nil || w.stoppedCh == nil {
120+
return // Already closed
121+
}
122+
123+
if !w.started {
124+
return
125+
}
126+
127+
select {
128+
case <-w.stopCh:
129+
// Already closed or stopped
130+
return
131+
default:
132+
close(w.stopCh)
133+
w.mu.Unlock()
134+
<-w.stoppedCh
135+
w.mu.Lock()
136+
w.started = false
137+
// Recreate channels for potential restart
138+
w.stopCh = make(chan struct{})
139+
w.stoppedCh = make(chan struct{})
57140
}
58141
}

0 commit comments

Comments
 (0)