Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix: increase shm default size and make it configurable
  • Loading branch information
julienmancuso committed Aug 21, 2025
commit 74aaf239466419e2cdf5ea79c86ea2844814f324
Original file line number Diff line number Diff line change
Expand Up @@ -10241,6 +10241,18 @@ spec:
serviceName:
description: contains the name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
properties:
disabled:
type: boolean
size:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
status:
description: Status reflects the current observed state of the component deployment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10340,6 +10340,18 @@ spec:
serviceName:
description: contains the name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
properties:
disabled:
type: boolean
size:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
description: |-
Services allows per-service overrides of the component deployment settings.
Expand Down
5 changes: 5 additions & 0 deletions deploy/cloud/operator/api/v1alpha1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,8 @@ type Autoscaling struct {
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
}

type SharedMemorySpec struct {
Disabled bool `json:"disabled,omitempty"`
Size resource.Quantity `json:"size,omitempty"`
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ type DynamoComponentDeploymentSharedSpec struct {
// Ingress config to expose the component outside the cluster (or through a service mesh).
Ingress *IngressSpec `json:"ingress,omitempty"`

// SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
SharedMemory *SharedMemorySpec `json:"sharedMemory,omitempty"`

// +optional
// ExtraPodMetadata adds labels/annotations to the created Pods.
ExtraPodMetadata *dynamoCommon.ExtraPodMetadata `json:"extraPodMetadata,omitempty"`
Expand Down
21 changes: 21 additions & 0 deletions deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -10241,6 +10241,18 @@ spec:
serviceName:
description: contains the name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
properties:
disabled:
type: boolean
size:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
status:
description: Status reflects the current observed state of the component deployment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10340,6 +10340,18 @@ spec:
serviceName:
description: contains the name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
properties:
disabled:
type: boolean
size:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
description: |-
Services allows per-service overrides of the component deployment settings.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(5*1024*1024*1024, resource.BinarySI), // 5gi (calculated from memory limit / 4)
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -893,7 +893,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(5*1024*1024*1024, resource.BinarySI), // 5gi (calculated from memory limit / 4)
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down
36 changes: 15 additions & 21 deletions deploy/cloud/operator/internal/dynamo/graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -778,9 +778,10 @@ func GenerateBasePodSpec(
MountPath: *component.PVC.MountPoint,
})
}
shmVolume, shmVolumeMount := generateSharedMemoryVolumeAndMount(&container.Resources)
volumes = append(volumes, shmVolume)
container.VolumeMounts = append(container.VolumeMounts, shmVolumeMount)
if shmVol, shmMount := generateSharedMemoryVolumeAndMount(component.SharedMemory); shmVol != nil && shmMount != nil {
volumes = append(volumes, *shmVol)
container.VolumeMounts = append(container.VolumeMounts, *shmMount)
}

// Apply backend-specific container modifications
multinodeDeployer := MultinodeDeployerFactory(multinodeDeploymentType)
Expand Down Expand Up @@ -1179,36 +1180,29 @@ func GenerateBasePodSpecForController(
return podSpec, nil
}

func generateSharedMemoryVolumeAndMount(resources *corev1.ResourceRequirements) (corev1.Volume, corev1.VolumeMount) {
sharedMemorySizeLimit := resource.MustParse("512Mi")
// Check if we have memory limits to work with
memoryLimit := resources.Limits[corev1.ResourceMemory]
if !memoryLimit.IsZero() {
// Use 1/4 of memory limit
calculatedSize := resource.NewQuantity(memoryLimit.Value()/4, resource.BinarySI)
// Apply bounds: minimum 512Mi, maximum 8Gi
minSize := resource.MustParse("512Mi")
maxSize := resource.MustParse("8Gi")

if calculatedSize.Cmp(minSize) > 0 && calculatedSize.Cmp(maxSize) < 0 {
sharedMemorySizeLimit = *calculatedSize
} else if calculatedSize.Cmp(maxSize) >= 0 {
sharedMemorySizeLimit = maxSize // Cap at maximum
func generateSharedMemoryVolumeAndMount(spec *v1alpha1.SharedMemorySpec) (*corev1.Volume, *corev1.VolumeMount) {
// default: enabled=true, size=8Gi
size := resource.MustParse("8Gi")
if spec != nil {
if spec.Disabled {
return nil, nil
}
if !spec.Size.IsZero() {
size = spec.Size
}
// If calculatedSize < minSize, keep the 512Mi base
}
volume := corev1.Volume{
Name: commonconsts.KubeValueNameSharedMemory,
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: &sharedMemorySizeLimit,
SizeLimit: &size,
},
},
}
volumeMount := corev1.VolumeMount{
Name: commonconsts.KubeValueNameSharedMemory,
MountPath: "/dev/shm",
}
return volume, volumeMount
return &volume, &volumeMount
}
20 changes: 10 additions & 10 deletions deploy/cloud/operator/internal/dynamo/graph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1235,7 +1235,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(536870912, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -1378,7 +1378,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(536870912, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -1733,7 +1733,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -1883,7 +1883,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -1989,7 +1989,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -2134,7 +2134,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -2509,7 +2509,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -2648,7 +2648,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -2755,7 +2755,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down Expand Up @@ -2899,7 +2899,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: resource.NewQuantity(8*1024*1024*1024, resource.BinarySI),
},
},
},
Expand Down
Loading