Skip to content
Merged
10 changes: 6 additions & 4 deletions benchmarks/router/run_engines.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
# SPDX-License-Identifier: Apache-2.0

# Parse command-line arguments
export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
NUM_WORKERS=8
MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
RECIPE_PATH="$DYNAMO_HOME/recipes/deepseek-r1-distill-llama-8b/trtllm"
TENSOR_PARALLEL_SIZE=1
DATA_PARALLEL_SIZE=1
USE_MOCKERS=false
Expand Down Expand Up @@ -84,13 +86,13 @@ if [ ${#EXTRA_ARGS[@]} -eq 0 ]; then
)
elif [ "$USE_TRTLLM" = true ]; then
# Default args for TensorRT-LLM engine using predefined YAML configs
# Config files located at: ../../components/backends/trtllm/engine_configs/{agg,decode,prefill}.yaml
# Config files located at: $RECIPE_PATH/{agg,decode,prefill}.yaml
if [ "$MODE" = "prefill" ]; then
ENGINE_CONFIG="../../components/backends/trtllm/engine_configs/prefill.yaml"
ENGINE_CONFIG="$RECIPE_PATH/prefill.yaml"
elif [ "$MODE" = "decode" ]; then
ENGINE_CONFIG="../../components/backends/trtllm/engine_configs/decode.yaml"
ENGINE_CONFIG="$RECIPE_PATH/decode.yaml"
else
ENGINE_CONFIG="../../components/backends/trtllm/engine_configs/agg.yaml"
ENGINE_CONFIG="$RECIPE_PATH/agg.yaml"
fi

EXTRA_ARGS=(
Expand Down
4 changes: 2 additions & 2 deletions components/backends/trtllm/deploy/agg-with-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ spec:
# mount the configmap as a volume
volumeMounts:
- name: nvidia-config
mountPath: /workspace/components/backends/trtllm/engine_configs
mountPath: /workspace/
readOnly: true
command:
- python3
Expand All @@ -67,4 +67,4 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/agg.yaml
- ./recipes/qwen3/trtllm/agg.yaml
4 changes: 2 additions & 2 deletions components/backends/trtllm/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
- -m
Expand All @@ -36,4 +36,4 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/agg.yaml
- ./recipes/qwen3/trtllm/agg.yaml
4 changes: 2 additions & 2 deletions components/backends/trtllm/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
- -m
Expand All @@ -39,5 +39,5 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/agg.yaml
- ./recipes/qwen3/trtllm/agg.yaml
- --publish-events-and-metrics
12 changes: 6 additions & 6 deletions components/backends/trtllm/deploy/disagg-multinode.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,10 @@ spec:
mainContainer:
volumeMounts:
- name: nvidia-config
mountPath: /workspace/components/backends/trtllm/engine_configs
mountPath: /workspace/
readOnly: true
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
- -m
Expand All @@ -139,7 +139,7 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- ./recipes/qwen3/trtllm/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
Expand All @@ -165,10 +165,10 @@ spec:
mainContainer:
volumeMounts:
- name: nvidia-config
mountPath: /workspace/components/backends/trtllm/engine_configs
mountPath: /workspace/
readOnly: true
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
- -m
Expand All @@ -179,7 +179,7 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- ./recipes/qwen3/trtllm/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
Expand Down
8 changes: 4 additions & 4 deletions components/backends/trtllm/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
- -m
Expand All @@ -37,7 +37,7 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- ./recipes/qwen3/trtllm/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
Expand All @@ -54,7 +54,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
- -m
Expand All @@ -65,7 +65,7 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- ./recipes/qwen3/trtllm/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
Expand Down
8 changes: 4 additions & 4 deletions components/backends/trtllm/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ spec:
terminationGracePeriodSeconds: 600
mainContainer:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
args:
Expand All @@ -97,7 +97,7 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- ./recipes/qwen3/trtllm/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
Expand All @@ -115,7 +115,7 @@ spec:
terminationGracePeriodSeconds: 600
mainContainer:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
args:
Expand All @@ -126,7 +126,7 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- ./recipes/qwen3/trtllm/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
Expand Down
8 changes: 4 additions & 4 deletions components/backends/trtllm/deploy/disagg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
- -m
Expand All @@ -39,7 +39,7 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- ./recipes/qwen3/trtllm/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
Expand All @@ -56,7 +56,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/
command:
- python3
- -m
Expand All @@ -67,7 +67,7 @@ spec:
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- ./recipes/qwen3/trtllm/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
Expand Down
3 changes: 2 additions & 1 deletion components/backends/trtllm/launch/agg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
# SPDX-License-Identifier: Apache-2.0

# Environment variables with defaults
export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"engine_configs/agg.yaml"}
export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
export MODALITY=${MODALITY:-"text"}
# If you want to use multimodal, set MODALITY to "multimodal"
#export MODALITY=${MODALITY:-"multimodal"}
Expand Down
3 changes: 2 additions & 1 deletion components/backends/trtllm/launch/agg_metrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
# SPDX-License-Identifier: Apache-2.0

# Environment variables with defaults
export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"engine_configs/agg.yaml"}
export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
export MODALITY=${MODALITY:-"text"}

# Setup cleanup trap
Expand Down
3 changes: 2 additions & 1 deletion components/backends/trtllm/launch/agg_router.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
# SPDX-License-Identifier: Apache-2.0

# Environment variables with defaults
export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"engine_configs/agg.yaml"}
export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}

# Setup cleanup trap
cleanup() {
Expand Down
5 changes: 3 additions & 2 deletions components/backends/trtllm/launch/disagg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
# SPDX-License-Identifier: Apache-2.0

# Environment variables with defaults
export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/prefill.yaml"}
export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/decode.yaml"}
export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/prefill.yaml"}
export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/decode.yaml"}
export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}
export MODALITY=${MODALITY:-"text"}
Expand Down
5 changes: 3 additions & 2 deletions components/backends/trtllm/launch/disagg_router.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
# SPDX-License-Identifier: Apache-2.0

# Environment variables with defaults
export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"prefill_first"}
export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/prefill.yaml"}
export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/decode.yaml"}
export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/prefill.yaml"}
export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/decode.yaml"}
export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}

Expand Down
7 changes: 4 additions & 3 deletions components/backends/trtllm/launch/epd_disagg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
# SPDX-License-Identifier: Apache-2.0

# Environment variables with defaults
export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen2-VL-7B-Instruct"}
export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen2-VL-7B-Instruct"}
export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/prefill.yaml"}
export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/decode.yaml"}
export ENCODE_ENGINE_ARGS=${ENCODE_ENGINE_ARGS:-"engine_configs/encode.yaml"}
export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen2-vl-7b-instruct/trtllm/prefill.yaml"}
export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen2-vl-7b-instruct/trtllm/decode.yaml"}
export ENCODE_ENGINE_ARGS=${ENCODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen2-vl-7b-instruct/trtllm/encode.yaml"}
export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}
export ENCODE_CUDA_VISIBLE_DEVICES=${ENCODE_CUDA_VISIBLE_DEVICES:-"2"}
Expand Down
5 changes: 3 additions & 2 deletions components/backends/trtllm/launch/gpt_oss_disagg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
# SPDX-License-Identifier: Apache-2.0

# Environment variables with defaults
export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
export MODEL_PATH=${MODEL_PATH:-"/model"}
export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"openai/gpt-oss-120b"}
export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"prefill_first"}
export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/gpt_oss/prefill.yaml"}
export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/gpt_oss/decode.yaml"}
export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml"}
export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/gpt-oss-120b/trtllm/disagg/decode.yaml"}

set -e
trap 'echo Cleaning up...; kill 0' EXIT
Expand Down
1 change: 1 addition & 0 deletions container/Dockerfile.trtllm
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ COPY examples /workspace/examples
COPY benchmarks /workspace/benchmarks
COPY deploy /workspace/deploy
COPY components/ /workspace/components/
COPY recipes/ /workspace/recipes/

# Copy attribution files
COPY ATTRIBUTION* LICENSE /workspace/
Expand Down
2 changes: 1 addition & 1 deletion docs/backends/trtllm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ cd $DYNAMO_HOME/components/backends/trtllm
```bash
cd $DYNAMO_HOME/components/backends/trtllm

export AGG_ENGINE_ARGS=./engine_configs/deepseek_r1/mtp/mtp_agg.yaml
export AGG_ENGINE_ARGS=./recipes/deepseek-r1/trtllm/mtp/mtp_agg.yaml
export SERVED_MODEL_NAME="nvidia/DeepSeek-R1-FP4"
# nvidia/DeepSeek-R1-FP4 is a large model
export MODEL_PATH="nvidia/DeepSeek-R1-FP4"
Expand Down
12 changes: 6 additions & 6 deletions docs/backends/trtllm/gemma3_sliding_window_attention.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ VSWA is a mechanism in which a model’s layers alternate between multiple slidi
cd $DYNAMO_HOME/components/backends/trtllm
export MODEL_PATH=google/gemma-3-1b-it
export SERVED_MODEL_NAME=$MODEL_PATH
export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
./launch/agg.sh
```

Expand All @@ -39,7 +39,7 @@ export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
cd $DYNAMO_HOME/components/backends/trtllm
export MODEL_PATH=google/gemma-3-1b-it
export SERVED_MODEL_NAME=$MODEL_PATH
export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
./launch/agg_router.sh
```

Expand All @@ -48,8 +48,8 @@ export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
cd $DYNAMO_HOME/components/backends/trtllm
export MODEL_PATH=google/gemma-3-1b-it
export SERVED_MODEL_NAME=$MODEL_PATH
export PREFILL_ENGINE_ARGS=engine_configs/gemma3/vswa_prefill.yaml
export DECODE_ENGINE_ARGS=engine_configs/gemma3/vswa_decode.yaml
export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml
export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml
./launch/disagg.sh
```

Expand All @@ -58,7 +58,7 @@ export DECODE_ENGINE_ARGS=engine_configs/gemma3/vswa_decode.yaml
cd $DYNAMO_HOME/components/backends/trtllm
export MODEL_PATH=google/gemma-3-1b-it
export SERVED_MODEL_NAME=$MODEL_PATH
export PREFILL_ENGINE_ARGS=engine_configs/gemma3/vswa_prefill.yaml
export DECODE_ENGINE_ARGS=engine_configs/gemma3/vswa_decode.yaml
export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml
export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml
./launch/disagg_router.sh
```
Loading
Loading