Fix the TRTLLM engine config locations and references

ai-dynamo · atchernych · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
commit a49a4342e737757a4670c217a4f71733e6286a4f
diff --git a/benchmarks/router/run_engines.sh b/benchmarks/router/run_engines.sh
@@ -7,7 +7,7 @@
 export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 NUM_WORKERS=8
 MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-RECIPE_PATH="$DYNAMO_HOME/recipes/deepseek-r1-distill-llama-8b/trtllm"
+ENGINE_CONFIG_PATH="$DYNAMO_HOME/examples/backends/trtllm/engine_configs/deepseek-r1-distill-llama-8b"
 TENSOR_PARALLEL_SIZE=1
 DATA_PARALLEL_SIZE=1
 USE_MOCKERS=false
@@ -86,13 +86,13 @@ if [ ${#EXTRA_ARGS[@]} -eq 0 ]; then
         )
     elif [ "$USE_TRTLLM" = true ]; then
         # Default args for TensorRT-LLM engine using predefined YAML configs
-        # Config files located at: $RECIPE_PATH/{agg,decode,prefill}.yaml
+        # Config files located at: $ENGINE_CONFIG_PATH/{agg,decode,prefill}.yaml
         if [ "$MODE" = "prefill" ]; then
-            ENGINE_CONFIG="$RECIPE_PATH/prefill.yaml"
+            ENGINE_CONFIG="$ENGINE_CONFIG_PATH/prefill.yaml"
         elif [ "$MODE" = "decode" ]; then
-            ENGINE_CONFIG="$RECIPE_PATH/decode.yaml"
+            ENGINE_CONFIG="$ENGINE_CONFIG_PATH/decode.yaml"
         else
-            ENGINE_CONFIG="$RECIPE_PATH/agg.yaml"
+            ENGINE_CONFIG="$ENGINE_CONFIG_PATH/agg.yaml"
         fi
 
         EXTRA_ARGS=(

diff --git a/docs/backends/trtllm/README.md b/docs/backends/trtllm/README.md
@@ -158,7 +158,7 @@ cd $DYNAMO_HOME/examples/backends/trtllm
 ```bash
 cd $DYNAMO_HOME/examples/backends/trtllm
 
-export AGG_ENGINE_ARGS=./recipes/deepseek-r1/trtllm/agg/mtp/mtp_agg.yaml
+export AGG_ENGINE_ARGS=./engine_configs/deepseek-r1/agg/mtp/mtp_agg.yaml
 export SERVED_MODEL_NAME="nvidia/DeepSeek-R1-FP4"
 # nvidia/DeepSeek-R1-FP4 is a large model
 export MODEL_PATH="nvidia/DeepSeek-R1-FP4"

diff --git a/docs/backends/trtllm/gemma3_sliding_window_attention.md b/docs/backends/trtllm/gemma3_sliding_window_attention.md
@@ -30,7 +30,7 @@ VSWA is a mechanism in which a model’s layers alternate between multiple slidi
 cd $DYNAMO_HOME/examples/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
+export AGG_ENGINE_ARGS=$DYNAMO_HOME/examples/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml
 ./launch/agg.sh
 ```
 
@@ -39,7 +39,7 @@ export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
 cd $DYNAMO_HOME/examples/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
+export AGG_ENGINE_ARGS=$DYNAMO_HOME/examples/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml
 ./launch/agg_router.sh
 ```
 
@@ -48,8 +48,8 @@ export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
 cd $DYNAMO_HOME/examples/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml
-export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml
+export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/examples/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
+export DECODE_ENGINE_ARGS=$DYNAMO_HOME/examples/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
 ./launch/disagg.sh
 ```
 
@@ -58,7 +58,7 @@ export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml
 cd $DYNAMO_HOME/examples/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml
-export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml
+export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/examples/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
+export DECODE_ENGINE_ARGS=$DYNAMO_HOME/examples/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
 ./launch/disagg_router.sh
 ```
diff --git a/docs/backends/trtllm/gpt-oss.md b/docs/backends/trtllm/gpt-oss.md
@@ -90,14 +90,14 @@ The deployment uses configuration files and command-line arguments to control be
 
 #### Configuration Files
 
-**Prefill Configuration (`recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml`)**:
+**Prefill Configuration (`examples/backends/trtllm/engine_configs/gpt-oss-120b/prefill.yaml`)**:
 - `enable_attention_dp: false` - Attention data parallelism disabled for prefill
 - `enable_chunked_prefill: true` - Enables efficient chunked prefill processing
 - `moe_config.backend: CUTLASS` - Uses optimized CUTLASS kernels for MoE layers
 - `cache_transceiver_config.backend: ucx` - Uses UCX for efficient KV cache transfer
 - `cuda_graph_config.max_batch_size: 32` - Maximum batch size for CUDA graphs
 
-**Decode Configuration (`recipes/gpt-oss-120b/trtllm/disagg/decode.yaml`)**:
+**Decode Configuration (`examples/backends/trtllm/engine_configs/gpt-oss-120b/decode.yaml`)**:
 - `enable_attention_dp: true` - Attention data parallelism enabled for decode
 - `disable_overlap_scheduler: false` - Enables overlapping for decode efficiency
 - `moe_config.backend: CUTLASS` - Uses optimized CUTLASS kernels for MoE layers
@@ -145,7 +145,7 @@ python3 -m dynamo.frontend --router-mode round-robin --http-port 8000 &
 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m dynamo.trtllm \
   --model-path /model \
   --served-model-name openai/gpt-oss-120b \
-  --extra-engine-args recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml \
+  --extra-engine-args examples/backends/trtllm/engine_configs/gpt-oss-120b/prefill.yaml \
   --dyn-reasoning-parser gpt_oss \
   --dyn-tool-call-parser harmony \
   --disaggregation-mode prefill \
@@ -161,7 +161,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m dynamo.trtllm \
 CUDA_VISIBLE_DEVICES=4,5,6,7 python3 -m dynamo.trtllm \
   --model-path /model \
   --served-model-name openai/gpt-oss-120b \
-  --extra-engine-args recipes/gpt-oss-120b/trtllm/disagg/decode.yaml \
+  --extra-engine-args examples/backends/trtllm/engine_configs/gpt-oss-120b/decode.yaml \
   --dyn-reasoning-parser gpt_oss \
   --dyn-tool-call-parser harmony \
   --disaggregation-mode decode \

diff --git a/docs/backends/trtllm/llama4_plus_eagle.md b/docs/backends/trtllm/llama4_plus_eagle.md
@@ -28,7 +28,7 @@ This guide demonstrates how to deploy Llama 4 Maverick Instruct with Eagle Specu
     - The other node runs the prefill worker.
 
 ## Notes
-* Make sure the (`eagle3_one_model: true`) is set in the LLM API config inside the `recipes/llama4/trtllm/eagle` folder.
+* Make sure the (`eagle3_one_model: true`) is set in the LLM API config inside the `examples/backends/trtllm/engine_configs/llama4/eagle` folder.
 
 ## Setup
 
@@ -52,17 +52,17 @@ See [this](./multinode/multinode-examples.md#setup) section from multinode guide
 ## Aggregated Serving
 ```bash
 export NUM_NODES=1
-export ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/eagle/eagle_agg.yml"
+export ENGINE_CONFIG="/mnt/examples/backends/trtllm/engine_configs/llama4/eagle/eagle_agg.yml"
 ./multinode/srun_aggregated.sh
 ```
 
 ## Disaggregated Serving
 
 ```bash
 export NUM_PREFILL_NODES=1
-export PREFILL_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/eagle/eagle_prefill.yaml"
+export PREFILL_ENGINE_CONFIG="/mnt/examples/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yml"
 export NUM_DECODE_NODES=1
-export DECODE_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/eagle/eagle_decode.yaml"
+export DECODE_ENGINE_CONFIG="/mnt/examples/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yml"
 ./multinode/srun_disaggregated.sh
 ```
 

diff --git a/docs/backends/trtllm/multimodal_support.md b/docs/backends/trtllm/multimodal_support.md
@@ -27,7 +27,7 @@ Here are quick steps to launch Llama-4 Maverick BF16 in aggregated mode
 ```bash
 cd $DYNAMO_HOME
 
-export AGG_ENGINE_ARGS=./recipes/llama4/trtllm/multimodal/agg.yaml
+export AGG_ENGINE_ARGS=./examples/backends/trtllm/engine_configs/llama4/multimodal/agg.yaml
 export SERVED_MODEL_NAME="meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 export MODEL_PATH="meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 ./launch/agg.sh
@@ -79,8 +79,8 @@ cd $DYNAMO_HOME
 
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen2-VL-7B-Instruct"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen2-VL-7B-Instruct"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"recipes/qwen2-vl-7b-instruct/trtllm/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"recipes/qwen2-vl-7b-instruct/trtllm/decode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"examples/backends/trtllm/engine_configs/qwen2-vl-7b-instruct/decode.yaml"}
 export MODALITY=${MODALITY:-"multimodal"}
 
 ./launch/disagg.sh

diff --git a/docs/backends/trtllm/multinode/multinode-examples.md b/docs/backends/trtllm/multinode/multinode-examples.md
@@ -136,7 +136,7 @@ follow these steps below to launch an **aggregated** deployment across 4 nodes:
 
 ```bash
 # Default set in srun_aggregated.sh, but can customize here.
-# export ENGINE_CONFIG="/mnt/recipes/deepseek-r1/trtllm/agg/wide_ep/wide_ep_agg.yaml"
+# export ENGINE_CONFIG="/mnt/examples/backends/trtllm/engine_configs/deepseek-r1/agg/wide_ep/wide_ep_agg.yaml"
 
 # Customize NUM_NODES to match the desired parallelism in ENGINE_CONFIG
 # The product of NUM_NODES*NUM_GPUS_PER_NODE should match the number of
@@ -165,8 +165,8 @@ deployment across 8 nodes:
 
 ```bash
 # Defaults set in srun_disaggregated.sh, but can customize here.
-# export PREFILL_ENGINE_CONFIG="/mnt/recipes/deepseek-r1/trtllm/disagg/wide_ep/wide_ep_prefill.yaml"
-# export DECODE_ENGINE_CONFIG="/mnt/recipes/deepseek-r1/trtllm/disagg/wide_ep/wide_ep_decode.yaml"
+# export PREFILL_ENGINE_CONFIG="/mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_prefill.yaml"
+# export DECODE_ENGINE_CONFIG="/mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/wide_ep_decode.yaml"
 
 # Customize NUM_PREFILL_NODES to match the desired parallelism in PREFILL_ENGINE_CONFIG
 # Customize NUM_DECODE_NODES to match the desired parallelism in DECODE_ENGINE_CONFIG

diff --git a/docs/backends/trtllm/multinode/multinode-multimodal-example.md b/docs/backends/trtllm/multinode/multinode-multimodal-example.md
@@ -34,7 +34,7 @@ limitations under the License.
 >
 > Before running the deployment, you must update the engine configuration files to change `backend: DEFAULT` to `backend: default` (lowercase). Run the following command:
 > ```bash
-> sed -i 's/backend: DEFAULT/backend: default/g' /mnt/recipes/llama4/trtllm/multimodal/prefill.yaml /mnt/recipes/llama4/trtllm/multimodal/decode.yaml
+> sed -i 's/backend: DEFAULT/backend: default/g' /mnt/examples/backends/trtllm/engine_configs/llama4/multimodal/prefill.yaml /mnt/examples/backends/trtllm/engine_configs/llama4/multimodal/decode.yaml
 > ```
 
 
@@ -100,8 +100,8 @@ deployment across 4 nodes:
 
 ```bash
 # Defaults set in srun_disaggregated.sh, but can customize here.
-# export PREFILL_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/multimodal/prefill.yaml"
-# export DECODE_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/multimodal/decode.yaml"
+# export PREFILL_ENGINE_CONFIG="/mnt/examples/backends/trtllm/engine_configs/llama4/multimodal/prefill.yaml"
+# export DECODE_ENGINE_CONFIG="/mnt/examples/backends/trtllm/engine_configs/llama4/multimodal/decode.yaml"
 
 # Customize NUM_PREFILL_NODES to match the desired parallelism in PREFILL_ENGINE_CONFIG
 # Customize NUM_DECODE_NODES to match the desired parallelism in DECODE_ENGINE_CONFIG

diff --git a/docs/kubernetes/README.md b/docs/kubernetes/README.md
@@ -203,7 +203,7 @@ args:
   - python3 -m dynamo.trtllm
     --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
     --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    --extra-engine-args /workspace/recipes/deepseek-r1-distill-llama-8b/agg.yaml
+    --extra-engine-args /workspace/examples/backends/trtllm/engine_configs/deepseek-r1-distill-llama-8b/agg.yaml
 ```
 
 Key customization points include:

@@ -67,4 +67,4 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - ./recipes/qwen3/trtllm/agg.yaml
+            - ./examples/backends/trtllm/engine_configs/qwen3/agg.yaml
@@ -36,4 +36,4 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - ./recipes/qwen3/trtllm/agg.yaml
+            - ./examples/backends/trtllm/engine_configs/qwen3/agg.yaml
@@ -39,5 +39,5 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - ./recipes/qwen3/trtllm/agg.yaml
+            - ./examples/backends/trtllm/engine_configs/qwen3/agg.yaml
             - --publish-events-and-metrics
@@ -37,7 +37,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - ./recipes/qwen3/trtllm/prefill.yaml
+            - ./examples/backends/trtllm/engine_configs/qwen3/prefill.yaml
             - --disaggregation-mode
             - prefill
     TRTLLMDecodeWorker:
@@ -63,6 +63,6 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - ./recipes/qwen3/trtllm/decode.yaml
+            - ./examples/backends/trtllm/engine_configs/qwen3/decode.yaml
             - --disaggregation-mode
             - decode
@@ -97,7 +97,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - ./recipes/qwen3/trtllm/decode.yaml
+            - ./examples/backends/trtllm/engine_configs/qwen3/decode.yaml
             - --disaggregation-mode
             - decode
     TRTLLMPrefillWorker:
@@ -124,6 +124,6 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - ./recipes/qwen3/trtllm/prefill.yaml
+            - ./examples/backends/trtllm/engine_configs/qwen3/prefill.yaml
             - --disaggregation-mode
             - prefill
@@ -39,7 +39,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - ./recipes/qwen3/trtllm/prefill.yaml
+            - ./examples/backends/trtllm/engine_configs/qwen3/prefill.yaml
             - --disaggregation-mode
             - prefill
             - --publish-events-and-metrics
@@ -65,6 +65,6 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - ./recipes/qwen3/trtllm/decode.yaml
+            - ./examples/backends/trtllm/engine_configs/qwen3/decode.yaml
             - --disaggregation-mode
             - decode
@@ -11,7 +11,7 @@ moe_config:
   #   moe_max_num_tokens = max_batch_size * moe_expert_parallel_size
   #   4096 = 256 * 16
   # moe_max_num_tokens: 4096
-  load_balancer: /mnt/recipes/deepseek-r1/trtllm/agg/wide_ep/eplb.yaml
+  load_balancer: /mnt/examples/backends/trtllm/engine_configs/deepseek-r1/agg/wide_ep/eplb.yaml
 
 tensor_parallel_size: 16
 moe_expert_parallel_size: 16

@@ -17,7 +17,7 @@ backend: pytorch
 # WideEP related settings
 moe_config:
   backend: WIDEEP
-  load_balancer: /mnt/recipes/deepseek-r1/trtllm/disagg/wide_ep/eplb.yaml
+  load_balancer: /mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
 
 # TP/EP/PP/DP
 tensor_parallel_size: 16

@@ -17,7 +17,7 @@ backend: pytorch
 # WideEP related settings
 moe_config:
   backend: WIDEEP
-  load_balancer: /mnt/recipes/deepseek-r1/trtllm/disagg/wide_ep/eplb.yaml
+  load_balancer: /mnt/examples/backends/trtllm/engine_configs/deepseek-r1/disagg/wide_ep/eplb.yaml
 
 # TP/EP/PP/DP
 tensor_parallel_size: 16

@@ -0,0 +1,25 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+enable_attention_dp: true
+disable_overlap_scheduler: false
+moe_config:
+    backend: CUTLASS
+cuda_graph_config:
+    enable_padding: true
+cache_transceiver_config:
+  backend: UCX
+  max_tokens_in_buffer: 65536
-cache_transceiver_config:
-  backend: UCX
-  max_tokens_in_buffer: 65536
+cache_transceiver_config:
+    backend: UCX
+    max_tokens_in_buffer: 65536
-cache_transceiver_config:
-  backend: UCX
-  max_tokens_in_buffer: 65536
+cache_transceiver_config:
+    backend: UCX
+    max_tokens_in_buffer: 65536
+print_iter_log: false
+stream_interval: 10
@@ -0,0 +1,27 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+enable_attention_dp: false
+disable_overlap_scheduler: true
+moe_config:
+    backend: CUTLASS
+enable_chunked_prefill: true
+cuda_graph_config:
+    max_batch_size: 32
+    enable_padding: true
+cache_transceiver_config:
+  backend: UCX
+  max_tokens_in_buffer: 65536
-cache_transceiver_config:
-  backend: UCX
-  max_tokens_in_buffer: 65536
+cache_transceiver_config:
+    backend: UCX
+    max_tokens_in_buffer: 65536
-cache_transceiver_config:
-  backend: UCX
-  max_tokens_in_buffer: 65536
+cache_transceiver_config:
+    backend: UCX
+    max_tokens_in_buffer: 65536
+print_iter_log: false
+stream_interval: 10
@@ -6,7 +6,7 @@
 export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
+export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/examples/backends/trtllm/engine_configs/qwen3/agg.yaml"}
 export MODALITY=${MODALITY:-"text"}
 # If you want to use multimodal, set MODALITY to "multimodal"
 #export MODALITY=${MODALITY:-"multimodal"}

@@ -6,7 +6,7 @@
 export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
+export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/examples/backends/trtllm/engine_configs/qwen3/agg.yaml"}
 export MODALITY=${MODALITY:-"text"}
 
 # Setup cleanup trap

@@ -6,7 +6,7 @@
 export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
+export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/examples/backends/trtllm/engine_configs/qwen3/agg.yaml"}
 
 # Setup cleanup trap
 cleanup() {

@@ -6,8 +6,8 @@
 export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/decode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/examples/backends/trtllm/engine_configs/qwen3/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/examples/backends/trtllm/engine_configs/qwen3/decode.yaml"}
 export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
 export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}
 export MODALITY=${MODALITY:-"text"}