Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: implement workaround to scale Grove PodGangSet
  • Loading branch information
julienmancuso committed Aug 19, 2025
commit e130afc3f60194590a920e4e902c23f7f60e189d
45 changes: 29 additions & 16 deletions deploy/cloud/operator/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/client-go/discovery/cached/memory"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
_ "k8s.io/client-go/plugin/pkg/client/auth"
Expand Down Expand Up @@ -68,6 +69,32 @@ var (
setupLog = ctrl.Log.WithName("setup")
)

func createScalesGetter(mgr ctrl.Manager) (scale.ScalesGetter, error) {
config := mgr.GetConfig()

// Create kubernetes client for discovery
kubeClient, err := kubernetes.NewForConfig(config)
if err != nil {
return nil, err
}

// Create cached discovery client
cachedDiscovery := memory.NewMemCacheClient(kubeClient.Discovery())

// Create REST mapper
restMapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscovery)

// Use scale.New with the kubernetes client's REST client
scalesGetter := scale.New(
kubeClient.CoreV1().RESTClient(),
restMapper,
dynamic.LegacyAPIPathResolverFunc,
scale.NewDiscoveryScaleKindResolver(cachedDiscovery),
)

return scalesGetter, nil
}

func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))

Expand Down Expand Up @@ -325,21 +352,7 @@ func main() {
os.Exit(1)
}
// Create scale client for Grove resource scaling
config := mgr.GetConfig()
kubeClient, err := kubernetes.NewForConfig(config)
if err != nil {
setupLog.Error(err, "unable to create kubernetes client for scale operations")
os.Exit(1)
}

// Create cached discovery client
cachedDiscovery := memory.NewMemCacheClient(kubeClient.Discovery())

// Create REST mapper for discovery
restMapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscovery)

// Create scale client with proper parameters
scalesGetter, err := scale.NewForConfig(config, restMapper, nil, scale.NewDiscoveryScaleKindResolver(cachedDiscovery))
scaleClient, err := createScalesGetter(mgr)
if err != nil {
setupLog.Error(err, "unable to create scale client")
os.Exit(1)
Expand All @@ -350,7 +363,7 @@ func main() {
Recorder: mgr.GetEventRecorderFor("dynamographdeployment"),
Config: ctrlConfig,
DockerSecretRetriever: dockerSecretRetriever,
ScaleClient: scalesGetter,
ScaleClient: scaleClient,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DynamoGraphDeployment")
os.Exit(1)
Expand Down
13 changes: 8 additions & 5 deletions deploy/cloud/operator/internal/controller_common/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,22 @@ func ScaleResource(ctx context.Context, scaleClient scale.ScalesGetter, gvr sche
logger := log.FromContext(ctx)
logger.Info("Scaling resource", "gvr", gvr, "name", name, "namespace", namespace, "replicas", replicas)

// Get current scale to check if scaling is needed
if scaleClient == nil {
logger.Error(nil, "Scale client is nil")
return fmt.Errorf("scale client is nil")
}

currentScale, err := scaleClient.Scales(namespace).Get(ctx, gvr.GroupResource(), name, metav1.GetOptions{})
if err != nil {
logger.Error(err, "Failed to get current scale", "gvr", gvr, "name", name)
return fmt.Errorf("failed to get current scale for %s %s: %w", gvr.Resource, name, err)
logger.Error(err, "Failed to get current scale - resource may not support scale subresource", "gvr", gvr, "name", name, "namespace", namespace, "groupResource", gvr.GroupResource())
return fmt.Errorf("failed to get current scale for %s %s (resource may not support scale subresource): %w", gvr.Resource, name, err)
}

// Check if scaling is needed
if currentScale.Spec.Replicas == replicas {
logger.V(1).Info("Resource already at desired replica count", "gvr", gvr, "name", name, "replicas", replicas)
return nil
}

// Update scale
scaleObj := &autoscalingv1.Scale{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Expand All @@ -57,6 +59,7 @@ func ScaleResource(ctx context.Context, scaleClient scale.ScalesGetter, gvr sche
},
}

logger.V(1).Info("Updating scale", "gvr", gvr, "name", name, "newReplicas", replicas)
_, err = scaleClient.Scales(namespace).Update(ctx, gvr.GroupResource(), scaleObj, metav1.UpdateOptions{})
if err != nil {
logger.Error(err, "Failed to update scale", "gvr", gvr, "name", name, "replicas", replicas)
Expand Down