Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deploy/cloud/helm/crds/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ apiVersion: v2
name: dynamo-crds
description: A Helm chart for dynamo CRDs
type: application
version: 0.4.0
version: 0.4.1
dependencies: []
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,12 @@ spec:
minReplicas:
type: integer
type: object
backendFramework:
enum:
- sglang
- vllm
- trtllm
type: string
componentType:
type: string
dynamoComponent:
Expand Down Expand Up @@ -5039,6 +5045,8 @@ spec:
type: string
memory:
type: string
nodes:
type: string
type: object
requests:
properties:
Expand All @@ -5052,6 +5060,8 @@ spec:
type: string
memory:
type: string
nodes:
type: string
type: object
type: object
runMode:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ spec:
type: object
spec:
properties:
backendFramework:
enum:
- sglang
- vllm
- trtllm
type: string
dynamoGraph:
type: string
envs:
Expand Down Expand Up @@ -5094,6 +5100,8 @@ spec:
type: string
memory:
type: string
nodes:
type: string
type: object
requests:
properties:
Expand All @@ -5107,6 +5115,8 @@ spec:
type: string
memory:
type: string
nodes:
type: string
type: object
type: object
runMode:
Expand Down
1 change: 1 addition & 0 deletions deploy/cloud/operator/api/dynamo/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ type ResourceItem struct {
CPU string `json:"cpu,omitempty"`
Memory string `json:"memory,omitempty"`
GPU string `json:"gpu,omitempty"`
Nodes string `json:"nodes,omitempty"`
Custom map[string]string `json:"custom,omitempty"`
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ type DynamoComponentDeploymentSpec struct {
// contains the tag of the DynamoComponent: for example, "my_package:MyService"
DynamoTag string `json:"dynamoTag,omitempty"`

// BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm")
// +kubebuilder:validation:Enum=sglang;vllm;trtllm
BackendFramework string `json:"backendFramework,omitempty"`

DynamoComponentDeploymentSharedSpec `json:",inline"`
}

Expand Down Expand Up @@ -110,6 +114,13 @@ type IngressSpec struct {
IngressControllerClassName *string `json:"ingressControllerClassName,omitempty"`
}

func (i *IngressSpec) IsVirtualServiceEnabled() bool {
if i == nil {
return false
}
return i.Enabled && i.UseVirtualService && i.VirtualServiceGateway != nil
}

// DynamoComponentDeploymentStatus defines the observed state of DynamoComponentDeployment
type DynamoComponentDeploymentStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
Expand Down Expand Up @@ -195,11 +206,3 @@ func (s *DynamoComponentDeployment) SetDynamoDeploymentConfig(config []byte) {
Value: string(config),
})
}

// GetImage returns the docker image of the DynamoComponent
func (s *DynamoComponentDeployment) GetImage() string {
if s.Spec.ExtraPodSpec != nil && s.Spec.ExtraPodSpec.MainContainer != nil {
return s.Spec.ExtraPodSpec.MainContainer.Image
}
return ""
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ type DynamoGraphDeploymentSpec struct {
// Environment variables to be set in the deployment
// +kubebuilder:validation:Optional
Envs []corev1.EnvVar `json:"envs,omitempty"`
// BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm")
// +kubebuilder:validation:Enum=sglang;vllm;trtllm
BackendFramework string `json:"backendFramework,omitempty"`
}

// DynamoGraphDeploymentStatus defines the observed state of DynamoGraphDeployment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,12 @@ spec:
minReplicas:
type: integer
type: object
backendFramework:
enum:
- sglang
- vllm
- trtllm
type: string
componentType:
type: string
dynamoComponent:
Expand Down Expand Up @@ -5039,6 +5045,8 @@ spec:
type: string
memory:
type: string
nodes:
type: string
type: object
requests:
properties:
Expand All @@ -5052,6 +5060,8 @@ spec:
type: string
memory:
type: string
nodes:
type: string
type: object
type: object
runMode:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ spec:
type: object
spec:
properties:
backendFramework:
enum:
- sglang
- vllm
- trtllm
type: string
dynamoGraph:
type: string
envs:
Expand Down Expand Up @@ -5094,6 +5100,8 @@ spec:
type: string
memory:
type: string
nodes:
type: string
type: object
requests:
properties:
Expand All @@ -5107,6 +5115,8 @@ spec:
type: string
memory:
type: string
nodes:
type: string
type: object
type: object
runMode:
Expand Down
2 changes: 1 addition & 1 deletion deploy/cloud/operator/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ toolchain go1.24.3

require (
emperror.dev/errors v0.8.1
github.com/NVIDIA/grove/operator/api v0.0.0-20250717114148-daac6e53774f
github.com/NVIDIA/grove/operator/api v0.0.0-20250801123021-8b42bac59ef2
github.com/bsm/gomega v1.27.10
github.com/google/go-cmp v0.7.0
github.com/imdario/mergo v0.3.6
Expand Down
4 changes: 2 additions & 2 deletions deploy/cloud/operator/go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0=
emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE=
github.com/NVIDIA/grove/operator/api v0.0.0-20250717114148-daac6e53774f h1:2ePSNDm7/Tep8F99yCQVH8/vmn86L1cUzTbVlyNopmQ=
github.com/NVIDIA/grove/operator/api v0.0.0-20250717114148-daac6e53774f/go.mod h1:nJL33lsBe+9xCcZLYkNYg1wucE4hJfa4ZfHm1zamuG0=
github.com/NVIDIA/grove/operator/api v0.0.0-20250801123021-8b42bac59ef2 h1:JLOj0GiubP3VlR0okIbuqljvl+e2Vccnu6LX6wL34G0=
github.com/NVIDIA/grove/operator/api v0.0.0-20250801123021-8b42bac59ef2/go.mod h1:QlsR2wQLj9m/zVEqv5SsCPzyjN2ykYZ0r/NEnDf4WB4=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
Expand Down
16 changes: 16 additions & 0 deletions deploy/cloud/operator/internal/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ const (
DynamoSystemPort = 9090
DynamoSystemPortName = "system"

MpiRunSshPort = 2222

EnvDynamoServicePort = "DYNAMO_PORT"

KubeLabelDynamoSelector = "nvidia.com/selector"
Expand Down Expand Up @@ -47,4 +49,18 @@ const (
// Metrics related constants
KubeAnnotationEnableMetrics = "nvidia.com/enable-metrics" // User-provided annotation to control metrics
KubeLabelMetricsEnabled = "nvidia.com/metrics-enabled" // Controller-managed label for pod selection
KubeValueNameSharedMemory = "shared-memory"

// Grove multinode role suffixes
GroveRoleSuffixLeader = "ldr"
GroveRoleSuffixWorker = "wkr"

MpiRunSshSecretName = "mpi-run-ssh-secret"
)

type MultinodeDeploymentType string

const (
MultinodeDeploymentTypeGrove MultinodeDeploymentType = "grove"
MultinodeDeploymentTypeLWS MultinodeDeploymentType = "lws"
)
Loading
Loading