Make things more cluster-agnostic

Signed-off-by: jthomson04 <[email protected]>
ai-dynamo · jthomson04 · Nov 26, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 28, 2025
commit 8b977dc27d5f4a19ad0ccbf4133cb47b60f40ecd
diff --git a/components/backends/trtllm/performance_sweeps/benchmark_disagg.slurm b/components/backends/trtllm/performance_sweeps/benchmark_disagg.slurm
@@ -162,6 +162,12 @@ if [ -z "$DECODE_COUNT" ]; then
 fi
 echo "Decode Count: $DECODE_COUNT"
 
+if [[ -n $(scontrol show config | grep "GresTypes" | grep "gpu") ]]; then
+    gpus_per_node="--gpus-per-node ${ntasks_per_node}"
+else
+    gpus_per_node=""
+fi
+
 num_gen_nodes=$((gen_nodes/num_gen_servers))
 for ((i=1; i<=DECODE_COUNT; i++)); do
   echo "Running Decode Worker: ${i}"
@@ -176,10 +182,9 @@ for ((i=1; i<=DECODE_COUNT; i++)); do
       --container-mounts=${container_mounts} \
       --mpi=pmix \
       -w ${decode_nodes_csv} \
-      --nodes ${num_gen_nodes} \
+      --nodes ${num_gen_nodes} $gpus_per_node \
       --ntasks $gen_tp_size \
       --oversubscribe \
-      --gpus-per-node $ntasks_per_node \
       --overlap \
       -e UCX_NET_DEVICES,TRTLLM_UCX_INTERFACE \
       bash ${SCRIPTS_DIR}/scripts/start_disagg_worker.sh ${full_logdir}/decode_config.yaml ${ctx_gpus} ${served_model_name} ${model_path} 'decode' $gen_enable_attention_dp &> ${full_logdir}/output_decode_worker_${i}.log &
@@ -206,8 +211,7 @@ for ((i=1; i<=PREFILL_COUNT; i++)); do
         --oversubscribe \
         --overlap \
         --ntasks $(( ctx_tp_size < ntasks_per_node ? ctx_tp_size : ntasks_per_node )) \
-        --gpus-per-node $ntasks_per_node \
-        --nodes 1 \
+        --nodes 1 $gpus_per_node \
         -e UCX_NET_DEVICES,TRTLLM_UCX_INTERFACE \
         bash ${SCRIPTS_DIR}/scripts/start_disagg_worker.sh ${full_logdir}/prefill_config.yaml ${ctx_gpus} ${served_model_name} ${model_path} 'prefill' $ctx_enable_attention_dp &> ${full_logdir}/output_prefill_worker_${i}.log &
   prefill_pids+=($!)

diff --git a/components/backends/trtllm/performance_sweeps/submit_disagg.sh b/components/backends/trtllm/performance_sweeps/submit_disagg.sh
@@ -98,8 +98,15 @@ run_single() {
     gen_nodes=$(((gen_tp_size + NTASKS_PER_NODE - 1)/NTASKS_PER_NODE * gen_num))
     total_nodes=$((ctx_num + gen_nodes))
     total_tasks=$((total_nodes * NTASKS_PER_NODE))
+
+    if [[ -n $(scontrol show config | grep "GresTypes" | grep "gpu") ]]; then
+        gpus_per_node="--gpus-per-node ${NTASKS_PER_NODE}"
+    else
+        gpus_per_node=""
+    fi
+
     set -x
-    sbatch --nodes=${total_nodes} --gpus-per-node ${NTASKS_PER_NODE} --ntasks=${total_tasks} --ntasks-per-node=${NTASKS_PER_NODE} --segment=${total_nodes} ${slurm_args} benchmark_disagg.slurm ${ctx_num} ${ctx_tp_size} ${ctx_ep_size} ${ctx_enable_attention_dp} 30 20000 ${gen_num} ${gen_tp_size} ${gen_ep_size} ${gen_batch_size} ${gen_max_num_tokens} ${gen_enable_attention_dp} ${gen_gpu_memory_fraction} ${gen_eplb_num_slots} ${gen_mtp_size} "${gen_concurrency_list}" ${gen_nodes} ${kind} ${MODEL_PATH} ${SERVED_MODEL_NAME} ${IMAGE} ${ISL} ${OSL} ${BENCHMARK_KIND} ${NTASKS_PER_NODE}
+    sbatch --nodes=${total_nodes} ${gpus_per_node} --ntasks=${total_tasks} --ntasks-per-node=${NTASKS_PER_NODE} --segment=${total_nodes} ${slurm_args} benchmark_disagg.slurm ${ctx_num} ${ctx_tp_size} ${ctx_ep_size} ${ctx_enable_attention_dp} 30 20000 ${gen_num} ${gen_tp_size} ${gen_ep_size} ${gen_batch_size} ${gen_max_num_tokens} ${gen_enable_attention_dp} ${gen_gpu_memory_fraction} ${gen_eplb_num_slots} ${gen_mtp_size} "${gen_concurrency_list}" ${gen_nodes} ${kind} ${MODEL_PATH} ${SERVED_MODEL_NAME} ${IMAGE} ${ISL} ${OSL} ${BENCHMARK_KIND} ${NTASKS_PER_NODE}
     set +x
 }