@@ -15,6 +15,16 @@ const (
1515
1616type SGLangBackend struct {}
1717
18+ // isPythonCommand checks if the command is a Python interpreter
19+ func isPythonCommand (cmd string ) bool {
20+ if cmd == "python" || cmd == "python3" {
21+ return true
22+ }
23+ // Match python with version numbers like python3.11, python2.7, etc.
24+ matched , _ := regexp .MatchString (`^python\d+(\.\d+)*$` , cmd )
25+ return matched
26+ }
27+
1828func (b * SGLangBackend ) UpdateContainer (container * corev1.Container , numberOfNodes int32 , role Role , component * v1alpha1.DynamoComponentDeploymentOverridesSpec , serviceName string , multinodeDeployer MultinodeDeployer ) {
1929 // For single node, nothing to do
2030 if numberOfNodes <= 1 {
@@ -29,32 +39,83 @@ func (b *SGLangBackend) UpdateContainer(container *corev1.Container, numberOfNod
2939 }
3040
3141 // Generate the flags to add
32- flags := b .getMultinodeFlags (numberOfNodes , role , serviceName , multinodeDeployer )
42+ flags , needsShell := b .getMultinodeFlags (numberOfNodes , role , serviceName , multinodeDeployer )
3343 if flags == "" {
3444 return
3545 }
3646
37- // Flatten all args into a single command and inject flags
38- if len (container .Args ) > 0 {
39- fullCommand := strings .Join (container .Args , " " )
40- modifiedCommand := b .injectFlagsIntoPythonCommand (fullCommand , flags )
41- container .Args = []string {modifiedCommand }
47+ /*
48+ * Flag Injection Strategy for Multinode SGLang Deployments
49+ *
50+ * This code handles the injection of distributed training flags (--dist-init-addr, --nnodes, --node-rank)
51+ * into container commands for multinode SGLang deployments. The complexity arises from supporting multiple
52+ * container command patterns and ensuring proper environment variable interpretation.
53+ *
54+ * Two main scenarios are handled:
55+ *
56+ * 1. Direct Python Command (e.g., Command: ["python3"], Args: ["-m", "sglang", "..."])
57+ * - If shell interpretation is needed (for env vars): Wrap in "sh -c" with exec
58+ * - If no shell needed: Simply append flags to the Args array
59+ *
60+ * 2. Non-Python Command (e.g., Command: ["sh"], Args: ["-c", "python3 -m sglang ..."])
61+ * - Use regex-based injection to find embedded Python+SGLang commands within args
62+ * - Insert flags after the Python command but before any shell operators (|, &, ;)
63+ *
64+ * The needsShell flag indicates when environment variables require shell interpretation
65+ */
66+ if len (container .Command ) > 0 && isPythonCommand (container .Command [0 ]) {
67+ // Direct python command case
68+ if needsShell {
69+ // Transform to shell wrapper for env var interpretation
70+ fullCommand := strings .Join (container .Command , " " )
71+ originalArgs := strings .Join (container .Args , " " )
72+ var shellCommand string
73+ if len (container .Args ) > 0 {
74+ // Use exec to ensure PID 1 is given to the python command
75+ shellCommand = fmt .Sprintf ("exec %s %s %s" , fullCommand , originalArgs , flags )
76+ } else {
77+ // Use exec to ensure PID 1 is given to the python command
78+ shellCommand = fmt .Sprintf ("exec %s %s" , fullCommand , flags )
79+ }
80+ container .Command = []string {"sh" , "-c" }
81+ container .Args = []string {shellCommand }
82+ } else {
83+ // Simple append to args
84+ flagsSlice := strings .Fields (flags )
85+ container .Args = append (container .Args , flagsSlice ... )
86+ }
87+ } else {
88+ // Non-python command case - try injection on each arg individually
89+ for i , arg := range container .Args {
90+ modifiedArg := b .injectFlagsIntoPythonCommand (arg , flags )
91+ if modifiedArg != arg { // flags were successfully injected
92+ container .Args [i ] = modifiedArg
93+ break // stop after first successful injection
94+ }
95+ }
4296 }
4397}
4498
4599func (b * SGLangBackend ) UpdatePodSpec (podSpec * corev1.PodSpec , numberOfNodes int32 , role Role , component * v1alpha1.DynamoComponentDeploymentOverridesSpec , serviceName string ) {
46100 // do nothing
47101}
48102
49- // getMultinodeFlags returns the multinode flags as a single string
50- func (b * SGLangBackend ) getMultinodeFlags (numberOfNodes int32 , role Role , serviceName string , multinodeDeployer MultinodeDeployer ) string {
103+ // getMultinodeFlags returns the multinode flags and whether shell interpretation is needed
104+ func (b * SGLangBackend ) getMultinodeFlags (numberOfNodes int32 , role Role , serviceName string , multinodeDeployer MultinodeDeployer ) ( string , bool ) {
51105 distInitAddr := fmt .Sprintf ("%s:%s" , multinodeDeployer .GetLeaderHostname (serviceName ), SglangPort )
52- nodeRank := multinodeDeployer .GetNodeRank ()
53- // Determine node-rank
106+
107+ var nodeRank string
108+ var needsShell bool
109+
54110 if role == RoleLeader {
55111 nodeRank = "0"
112+ needsShell = false
113+ } else {
114+ nodeRank , needsShell = multinodeDeployer .GetNodeRank ()
56115 }
57- return fmt .Sprintf ("--dist-init-addr %s --nnodes %d --node-rank %s" , distInitAddr , numberOfNodes , nodeRank )
116+
117+ flags := fmt .Sprintf ("--dist-init-addr %s --nnodes %d --node-rank %s" , distInitAddr , numberOfNodes , nodeRank )
118+ return flags , needsShell
58119}
59120
60121// injectFlagsIntoPythonCommand finds python sglang commands and adds flags after them
0 commit comments