Skip to content
Merged
16 changes: 16 additions & 0 deletions docs/documents/apis.md
Original file line number Diff line number Diff line change
Expand Up @@ -2435,6 +2435,22 @@ Kubernetes core/v1.ResourceList
</tr>
<tr>
<td>
<code>virtualCapacity</code>
</td>
<td>
<em>
<a href="https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#resourcelist-v1-core">
Kubernetes core/v1.ResourceList
</a>
</em>
</td>
<td>
<em>(Optional)</em>
<p>VirtualCapacity represents the expected Node &lsquo;virtual&rsquo; capacity ie comprising virtual extended resources.</p>
</td>
</tr>
<tr>
<td>
<code>instanceType</code>
</td>
<td>
Expand Down
10 changes: 10 additions & 0 deletions kubernetes/crds/machine.sapcloud.io_machineclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ spec:
region:
description: Region of the expected node belonging to nodeGroup
type: string
virtualCapacity:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: VirtualCapacity represents the expected Node 'virtual'
capacity ie comprising virtual extended resources.
type: object
zone:
description: Zone of the expected node belonging to nodeGroup
type: string
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/machine/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,10 @@ type NodeTemplate struct {
// Capacity contains subfields to track all node resources required to scale nodegroup from zero
Capacity corev1.ResourceList

// VirtualCapacity represents the expected Node 'virtual' capacity ie comprising virtual extended resources.
// +optional
VirtualCapacity corev1.ResourceList

// Instance type of the node belonging to nodeGroup
InstanceType string

Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/machine/v1alpha1/machineclass_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ type NodeTemplate struct {
// Capacity contains subfields to track all node resources required to scale nodegroup from zero
Capacity corev1.ResourceList `json:"capacity"`

// VirtualCapacity represents the expected Node 'virtual' capacity ie comprising virtual extended resources.
// +optional
VirtualCapacity corev1.ResourceList `json:"virtualCapacity,omitempty"`

// Instance type of the node belonging to nodeGroup
InstanceType string `json:"instanceType"`

Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/machine/v1alpha1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/apis/machine/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/apis/machine/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions pkg/openapi/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pkg/util/provider/machinecontroller/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp
return retry, err
}

retry, err = c.syncMachineNodeTemplates(ctx, machine)
retry, err = c.syncNodeTemplates(ctx, machine, machineClass)
if err != nil {
return retry, err
}
Expand Down
107 changes: 91 additions & 16 deletions pkg/util/provider/machinecontroller/machine_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -486,14 +486,17 @@ func (c *controller) updateMachineStatusAndNodeCondition(ctx context.Context, ma
return machineutils.ShortRetry, err
}

// syncMachineNodeTemplate syncs nodeTemplates between machine and corresponding node-object.
// It ensures, that any nodeTemplate element available on Machine should be available on node-object.
// syncNodeTemplates syncs nodeTemplates between machine, machineClass and corresponding node-object.
// It ensures that any nodeTemplate element available on Machine should be available on node-object.
// It ensures that MachineClass.NodeTemplate.VirtualCapacity is synced to the Node's Capacity.
// Although there could be more elements already available on node-object which will not be touched.
func (c *controller) syncMachineNodeTemplates(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) {
func (c *controller) syncNodeTemplates(ctx context.Context, machine *v1alpha1.Machine, machineClass *v1alpha1.MachineClass) (machineutils.RetryPeriod, error) {
var (
initializedNodeAnnotation bool
currentlyAppliedALTJSONByte []byte
lastAppliedALT v1alpha1.NodeTemplateSpec
initializedNodeAnnotation bool
currentlyAppliedALTJSONByte []byte
lastAppliedALT v1alpha1.NodeTemplateSpec
currentlyAppliedVirtualCapacityJSONByte []byte
lastAppliedVirtualCapacity v1.ResourceList
)

node, err := c.nodeLister.Get(getNodeName(machine))
Expand Down Expand Up @@ -524,10 +527,30 @@ func (c *controller) syncMachineNodeTemplates(ctx context.Context, machine *v1al
}
}

lastAppliedVirtualCapacityJSONString, exists := node.Annotations[machineutils.LastAppliedVirtualCapacityAnnotation]
if exists {
err = json.Unmarshal([]byte(lastAppliedVirtualCapacityJSONString), &lastAppliedVirtualCapacity)
if err != nil {
klog.Errorf("Error occurred while syncing node virtual capacity: %s", err)
return machineutils.ShortRetry, err
}
}

annotationsChanged := SyncMachineAnnotations(machine, nodeCopy, lastAppliedALT.Annotations)
labelsChanged := SyncMachineLabels(machine, nodeCopy, lastAppliedALT.Labels)
taintsChanged := SyncMachineTaints(machine, nodeCopy, lastAppliedALT.Spec.Taints)

var virtualCapacityChanged bool
var desiredVirtualCapacity v1.ResourceList
if machineClass != nil && machineClass.NodeTemplate != nil {
desiredVirtualCapacity = machineClass.NodeTemplate.VirtualCapacity
virtualCapacityChanged = SyncVirtualCapacity(desiredVirtualCapacity, nodeCopy, lastAppliedVirtualCapacity)
}

if !initializedNodeAnnotation && !annotationsChanged && !labelsChanged && !taintsChanged && !virtualCapacityChanged {
return machineutils.LongRetry, nil
}

// Update node-object with latest nodeTemplate elements if elements have changed.
if initializedNodeAnnotation || labelsChanged || annotationsChanged || taintsChanged {

Expand All @@ -548,23 +571,44 @@ func (c *controller) syncMachineNodeTemplates(ctx context.Context, machine *v1al
return machineutils.ShortRetry, err
}
nodeCopy.Annotations[machineutils.LastAppliedALTAnnotation] = string(currentlyAppliedALTJSONByte)
}

_, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{})
if virtualCapacityChanged {
klog.V(3).Infof("virtualCapacity changed, attempting UpdateStatus for node.Status.Capacity of node %q to %v", nodeCopy.Name, nodeCopy.Status.Capacity)
// must patch the Node’s status subresource, because capacity lives under status
nodeUpdated, err := c.targetCoreClient.CoreV1().Nodes().UpdateStatus(ctx, nodeCopy, metav1.UpdateOptions{})
if err != nil {
// Keep retrying until update goes through
klog.Errorf("Updated failed for node object of machine %q. Retrying, error: %q", machine.Name, err)
klog.Errorf("UpdateStatus failed for node %q of machine %q. error: %q", node.Name, machine.Name, err)
return machineutils.ShortRetry, err
}
klog.V(3).Infof("node.Status.Capacity of node %q updated to: %v", node.Name, nodeUpdated.Status.Capacity)
currentlyAppliedVirtualCapacityJSONByte, err = json.Marshal(desiredVirtualCapacity)
if err != nil {
klog.Errorf("Error occurred while syncing node virtual capacity of node %q: %v", node.Name, err)
return machineutils.ShortRetry, err
}
nodeCopy = nodeUpdated.DeepCopy()
if len(desiredVirtualCapacity) == 0 {
delete(nodeCopy.Annotations, machineutils.LastAppliedVirtualCapacityAnnotation)
} else {
// Return error to continue in next reconcile
err = errSuccessfulALTsync
nodeCopy.Annotations[machineutils.LastAppliedVirtualCapacityAnnotation] = string(currentlyAppliedVirtualCapacityJSONByte)
}
}

if apierrors.IsConflict(err) {
return machineutils.ConflictRetry, err
}
return machineutils.ShortRetry, err
_, err = c.targetCoreClient.CoreV1().Nodes().Update(ctx, nodeCopy, metav1.UpdateOptions{})
if err != nil {
// Keep retrying until update goes through
klog.Errorf("Updated failed for node object of machine %q. Retrying, error: %q", machine.Name, err)
} else {
// Return error to continue in next reconcile
err = errSuccessfulALTsync
}

return machineutils.LongRetry, nil
if apierrors.IsConflict(err) {
return machineutils.ConflictRetry, err
}
return machineutils.ShortRetry, err

}

// SyncMachineAnnotations syncs the annotations of the machine with node-objects.
Expand Down Expand Up @@ -719,6 +763,37 @@ func SyncMachineTaints(
return toBeUpdated
}

// SyncVirtualCapacity syncs the MachineClass.NodeTemplate.VirtualCapacity with the Node.Status.Capacity
// It returns true if update is needed else false.
func SyncVirtualCapacity(desiredVirtualCapacity v1.ResourceList, node *v1.Node, lastAppliedVirtualCapacity v1.ResourceList) bool {
toBeUpdated := false

if node.Status.Capacity == nil {
node.Status.Capacity = v1.ResourceList{}
}
if desiredVirtualCapacity == nil {
desiredVirtualCapacity = v1.ResourceList{}
}

// Delete any keys that existed in the past but has been deleted now
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Delete any keys that existed in the past but has been deleted now
// Delete any keys that existed in the past but have been removed now

for prevKey := range lastAppliedVirtualCapacity {
if _, exists := desiredVirtualCapacity[prevKey]; !exists {
delete(node.Status.Capacity, prevKey)
toBeUpdated = true
}
}

// Add/Update any key that doesn't exist or whose value as changed
for targKey, targQuant := range desiredVirtualCapacity {
if nodeQuant, exists := node.Status.Capacity[targKey]; !exists || !nodeQuant.Equal(targQuant) {
node.Status.Capacity[targKey] = targQuant
toBeUpdated = true
}
}

return toBeUpdated
}

// machineCreateErrorHandler updates the machine status based on
// CreateMachineResponse and the error during the machine creation
func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1alpha1.Machine, createMachineResponse *driver.CreateMachineResponse, err error) (machineutils.RetryPeriod, error) {
Expand Down
Loading