From 95a7fbeccbf299b3b25fe0f01a99b99568ea020c Mon Sep 17 00:00:00 2001
From: Anant Sharma <anants@nvidia.com>
Date: Tue, 21 Oct 2025 07:02:44 -0700
Subject: [PATCH 1/8] refactor: move engine configs out of components directory

Signed-off-by: Anant Sharma <anants@nvidia.com>
---
 .../trtllm/engine_configs/multimodal/agg.yaml | 33 -------------------
 .../engine_configs/multimodal/decode.yaml     | 29 ----------------
 .../engine_configs/multimodal/prefill.yaml    | 31 -----------------
 .../backends/trtllm/launch/gpt_oss_disagg.sh  |  4 +--
 .../trtllm/multinode/srun_aggregated.sh       |  6 ++--
 .../trtllm/multinode/srun_disaggregated.sh    | 10 +++---
 docs/backends/trtllm/README.md                |  2 +-
 .../trtllm/gemma3_sliding_window_attention.md | 12 +++----
 docs/backends/trtllm/gpt-oss.md               |  8 ++---
 docs/backends/trtllm/llama4_plus_eagle.md     |  8 ++---
 docs/backends/trtllm/multimodal_support.md    |  6 ++--
 .../trtllm/multinode/multinode-examples.md    |  6 ++--
 .../multinode/multinode-multimodal-example.md |  6 ++--
 .../scripts}/start_frontend_services.sh       |  0
 .../scripts}/start_trtllm_worker.sh           |  0
 .../deepseek-r1/trtllm}/mtp/mtp_agg.yaml      |  0
 .../deepseek-r1/trtllm}/mtp/mtp_decode.yaml   |  0
 .../deepseek-r1/trtllm}/mtp/mtp_prefill.yaml  |  0
 .../deepseek-r1/trtllm}/simple/agg.yaml       |  0
 .../deepseek-r1/trtllm}/simple/decode.yaml    |  0
 .../deepseek-r1/trtllm}/simple/prefill.yaml   |  0
 .../trtllm}/wide_ep/dep16_agg.yaml            |  0
 .../deepseek-r1/trtllm}/wide_ep/eplb.yaml     |  0
 .../trtllm}/wide_ep/wide_ep_agg.yaml          |  2 +-
 .../trtllm}/wide_ep/wide_ep_decode.yaml       |  2 +-
 .../trtllm}/wide_ep/wide_ep_prefill.yaml      |  2 +-
 .../gemma3/trtllm}/vswa_agg.yaml              |  0
 .../gemma3/trtllm}/vswa_decode.yaml           |  0
 .../gemma3/trtllm}/vswa_prefill.yaml          |  0
 .../gpt-oss-120b/trtllm/disagg}/decode.yaml   |  0
 .../gpt-oss-120b/trtllm/disagg}/prefill.yaml  |  0
 .../llama4/trtllm}/eagle/eagle_agg.yml        |  0
 .../llama4/trtllm}/eagle/eagle_decode.yaml    |  0
 .../llama4/trtllm}/eagle/eagle_prefill.yaml   |  0
 .../llama4/trtllm/multimodal}/decode.yaml     |  0
 .../llama4/trtllm/multimodal}/prefill.yaml    |  0
 36 files changed, 37 insertions(+), 130 deletions(-)
 delete mode 100644 components/backends/trtllm/engine_configs/multimodal/agg.yaml
 delete mode 100644 components/backends/trtllm/engine_configs/multimodal/decode.yaml
 delete mode 100644 components/backends/trtllm/engine_configs/multimodal/prefill.yaml
 rename {components/backends/trtllm/multinode => examples/multimodal/scripts}/start_frontend_services.sh (100%)
 rename {components/backends/trtllm/multinode => examples/multimodal/scripts}/start_trtllm_worker.sh (100%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/mtp/mtp_agg.yaml (100%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/mtp/mtp_decode.yaml (100%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/mtp/mtp_prefill.yaml (100%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/simple/agg.yaml (100%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/simple/decode.yaml (100%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/simple/prefill.yaml (100%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/wide_ep/dep16_agg.yaml (100%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/wide_ep/eplb.yaml (100%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/wide_ep/wide_ep_agg.yaml (92%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/wide_ep/wide_ep_decode.yaml (96%)
 rename {components/backends/trtllm/engine_configs/deepseek_r1 => recipes/deepseek-r1/trtllm}/wide_ep/wide_ep_prefill.yaml (95%)
 rename {components/backends/trtllm/engine_configs/gemma3 => recipes/gemma3/trtllm}/vswa_agg.yaml (100%)
 rename {components/backends/trtllm/engine_configs/gemma3 => recipes/gemma3/trtllm}/vswa_decode.yaml (100%)
 rename {components/backends/trtllm/engine_configs/gemma3 => recipes/gemma3/trtllm}/vswa_prefill.yaml (100%)
 rename {components/backends/trtllm/engine_configs/gpt_oss => recipes/gpt-oss-120b/trtllm/disagg}/decode.yaml (100%)
 rename {components/backends/trtllm/engine_configs/gpt_oss => recipes/gpt-oss-120b/trtllm/disagg}/prefill.yaml (100%)
 rename {components/backends/trtllm/engine_configs/llama4 => recipes/llama4/trtllm}/eagle/eagle_agg.yml (100%)
 rename {components/backends/trtllm/engine_configs/llama4 => recipes/llama4/trtllm}/eagle/eagle_decode.yaml (100%)
 rename {components/backends/trtllm/engine_configs/llama4 => recipes/llama4/trtllm}/eagle/eagle_prefill.yaml (100%)
 rename {components/backends/trtllm/engine_configs/multimodal/llama4 => recipes/llama4/trtllm/multimodal}/decode.yaml (100%)
 rename {components/backends/trtllm/engine_configs/multimodal/llama4 => recipes/llama4/trtllm/multimodal}/prefill.yaml (100%)

diff --git a/components/backends/trtllm/engine_configs/multimodal/agg.yaml b/components/backends/trtllm/engine_configs/multimodal/agg.yaml
deleted file mode 100644
index 754f8ce759..0000000000
--- a/components/backends/trtllm/engine_configs/multimodal/agg.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-tensor_parallel_size: 8
-moe_expert_parallel_size: 1
-enable_attention_dp: false
-max_num_tokens: 4096
-max_batch_size: 8
-trust_remote_code: true
-backend: pytorch
-enable_chunked_prefill: true
-
-kv_cache_config:
-  free_gpu_memory_fraction: 0.3
-  enable_block_reuse: false
-
-cache_transceiver_config:
-  backend: DEFAULT
-# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
-# NOTE: overlap_scheduler enabled by default since this commit and changed
-# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
-# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
diff --git a/components/backends/trtllm/engine_configs/multimodal/decode.yaml b/components/backends/trtllm/engine_configs/multimodal/decode.yaml
deleted file mode 100644
index 6dbd676ee4..0000000000
--- a/components/backends/trtllm/engine_configs/multimodal/decode.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-tensor_parallel_size: 1
-moe_expert_parallel_size: 1
-enable_attention_dp: false
-max_num_tokens: 8192
-max_batch_size: 16
-trust_remote_code: true
-backend: pytorch
-enable_chunked_prefill: true
-disable_overlap_scheduler: false
-kv_cache_config:
-  free_gpu_memory_fraction: 0.30
-  enable_block_reuse: false
-
-cache_transceiver_config:
-  backend: DEFAULT
\ No newline at end of file
diff --git a/components/backends/trtllm/engine_configs/multimodal/prefill.yaml b/components/backends/trtllm/engine_configs/multimodal/prefill.yaml
deleted file mode 100644
index 83a65e8bf3..0000000000
--- a/components/backends/trtllm/engine_configs/multimodal/prefill.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-tensor_parallel_size: 1
-moe_expert_parallel_size: 1
-enable_attention_dp: false
-max_num_tokens: 8192
-max_batch_size: 16
-trust_remote_code: true
-backend: pytorch
-enable_chunked_prefill: true
-# Overlap scheduler not currently supported in prefill only workers.
-disable_overlap_scheduler: true
-
-kv_cache_config:
-  free_gpu_memory_fraction: 0.30
-  enable_block_reuse: false
-
-cache_transceiver_config:
-  backend: DEFAULT
\ No newline at end of file
diff --git a/components/backends/trtllm/launch/gpt_oss_disagg.sh b/components/backends/trtllm/launch/gpt_oss_disagg.sh
index db42c01771..606ba2a8a2 100755
--- a/components/backends/trtllm/launch/gpt_oss_disagg.sh
+++ b/components/backends/trtllm/launch/gpt_oss_disagg.sh
@@ -6,8 +6,8 @@
 export MODEL_PATH=${MODEL_PATH:-"/model"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"openai/gpt-oss-120b"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"prefill_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/gpt_oss/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/gpt_oss/decode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"recipes/gpt-oss-120b/trtllm/disagg/decode.yaml"}
 
 set -e
 trap 'echo Cleaning up...; kill 0' EXIT
diff --git a/components/backends/trtllm/multinode/srun_aggregated.sh b/components/backends/trtllm/multinode/srun_aggregated.sh
index ac1187bf00..e9568db9cd 100755
--- a/components/backends/trtllm/multinode/srun_aggregated.sh
+++ b/components/backends/trtllm/multinode/srun_aggregated.sh
@@ -18,7 +18,7 @@ MOUNTS="${MOUNTS:-${DEFAULT_MOUNT}}"
 NUM_NODES=${NUM_NODES:-4}
 NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE:-4}
 
-export ENGINE_CONFIG="${ENGINE_CONFIG:-/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_agg.yaml}"
+export ENGINE_CONFIG="${ENGINE_CONFIG:-/mnt/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_agg.yaml}"
 
 # Automate settings of certain variables for convenience, but you are free
 # to manually set these for more control as well.
@@ -51,7 +51,7 @@ srun \
   --nodelist "${HEAD_NODE}" \
   --nodes 1 \
   --jobid "${SLURM_JOB_ID}" \
-  /mnt/multinode/start_frontend_services.sh &
+  /mnt/examples/multimodal/scripts/start_frontend_services.sh &
 
 # NOTE: Output streamed to stdout for ease of understanding the example, but
 # in practice you would probably set `srun --output ... --error ...` to pipe
@@ -71,4 +71,4 @@ srun \
   --nodes "${NUM_NODES}" \
   --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
   --jobid "${SLURM_JOB_ID}" \
-  /mnt/multinode/start_trtllm_worker.sh &
\ No newline at end of file
+  /mnt/examples/multimodal/scripts/start_trtllm_worker.sh &
\ No newline at end of file
diff --git a/components/backends/trtllm/multinode/srun_disaggregated.sh b/components/backends/trtllm/multinode/srun_disaggregated.sh
index c8d9ac99cb..fddac1ccbe 100755
--- a/components/backends/trtllm/multinode/srun_disaggregated.sh
+++ b/components/backends/trtllm/multinode/srun_disaggregated.sh
@@ -17,11 +17,11 @@ NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE:-4}
 
 NUM_PREFILL_NODES=${NUM_PREFILL_NODES:-4}
 NUM_PREFILL_WORKERS=${NUM_PREFILL_WORKERS:-1}
-PREFILL_ENGINE_CONFIG="${PREFILL_ENGINE_CONFIG:-/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml}"
+PREFILL_ENGINE_CONFIG="${PREFILL_ENGINE_CONFIG:-/mnt/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_prefill.yaml}"
 
 NUM_DECODE_NODES=${NUM_DECODE_NODES:-4}
 NUM_DECODE_WORKERS=${NUM_DECODE_WORKERS:-1}
-DECODE_ENGINE_CONFIG="${DECODE_ENGINE_CONFIG:-/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml}"
+DECODE_ENGINE_CONFIG="${DECODE_ENGINE_CONFIG:-/mnt/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_decode.yaml}"
 
 DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
 
@@ -56,7 +56,7 @@ srun \
   --nodelist "${HEAD_NODE}" \
   --nodes 1 \
   --jobid "${SLURM_JOB_ID}" \
-  /mnt/multinode/start_frontend_services.sh &
+  /mnt/examples/multimodal/scripts/start_frontend_services.sh &
 
 # NOTE: Output streamed to stdout for ease of understanding the example, but
 # in practice you would probably set `srun --output ... --error ...` to pipe
@@ -78,7 +78,7 @@ for ((i=1; i<=${NUM_PREFILL_WORKERS}; i++)); do
     --nodes "${NUM_PREFILL_NODES}" \
     --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
     --jobid "${SLURM_JOB_ID}" \
-    /mnt/multinode/start_trtllm_worker.sh &
+    /mnt/examples/multimodal/scripts/start_trtllm_worker.sh &
 done
 
 for ((i=1; i<=${NUM_DECODE_WORKERS}; i++)); do
@@ -98,5 +98,5 @@ for ((i=1; i<=${NUM_DECODE_WORKERS}; i++)); do
     --nodes "${NUM_DECODE_NODES}" \
     --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
     --jobid "${SLURM_JOB_ID}" \
-    /mnt/multinode/start_trtllm_worker.sh &
+    /mnt/examples/multimodal/scripts/start_trtllm_worker.sh &
 done
\ No newline at end of file
diff --git a/docs/backends/trtllm/README.md b/docs/backends/trtllm/README.md
index a7e3107659..a43176e24f 100644
--- a/docs/backends/trtllm/README.md
+++ b/docs/backends/trtllm/README.md
@@ -162,7 +162,7 @@ cd $DYNAMO_HOME/components/backends/trtllm
 ```bash
 cd $DYNAMO_HOME/components/backends/trtllm
 
-export AGG_ENGINE_ARGS=./engine_configs/deepseek_r1/mtp/mtp_agg.yaml
+export AGG_ENGINE_ARGS=./recipes/deepseek-r1/trtllm/mtp/mtp_agg.yaml
 export SERVED_MODEL_NAME="nvidia/DeepSeek-R1-FP4"
 # nvidia/DeepSeek-R1-FP4 is a large model
 export MODEL_PATH="nvidia/DeepSeek-R1-FP4"
diff --git a/docs/backends/trtllm/gemma3_sliding_window_attention.md b/docs/backends/trtllm/gemma3_sliding_window_attention.md
index 5f9cca904c..5161332205 100644
--- a/docs/backends/trtllm/gemma3_sliding_window_attention.md
+++ b/docs/backends/trtllm/gemma3_sliding_window_attention.md
@@ -30,7 +30,7 @@ VSWA is a mechanism in which a model’s layers alternate between multiple slidi
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
+export AGG_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_agg.yaml
 ./launch/agg.sh
 ```
 
@@ -39,7 +39,7 @@ export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
+export AGG_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_agg.yaml
 ./launch/agg_router.sh
 ```
 
@@ -48,8 +48,8 @@ export AGG_ENGINE_ARGS=engine_configs/gemma3/vswa_agg.yaml
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export PREFILL_ENGINE_ARGS=engine_configs/gemma3/vswa_prefill.yaml
-export DECODE_ENGINE_ARGS=engine_configs/gemma3/vswa_decode.yaml
+export PREFILL_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_prefill.yaml
+export DECODE_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_decode.yaml
 ./launch/disagg.sh
 ```
 
@@ -58,7 +58,7 @@ export DECODE_ENGINE_ARGS=engine_configs/gemma3/vswa_decode.yaml
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export PREFILL_ENGINE_ARGS=engine_configs/gemma3/vswa_prefill.yaml
-export DECODE_ENGINE_ARGS=engine_configs/gemma3/vswa_decode.yaml
+export PREFILL_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_prefill.yaml
+export DECODE_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_decode.yaml
 ./launch/disagg_router.sh
 ```
diff --git a/docs/backends/trtllm/gpt-oss.md b/docs/backends/trtllm/gpt-oss.md
index 9c1f130522..279f2fd1c2 100644
--- a/docs/backends/trtllm/gpt-oss.md
+++ b/docs/backends/trtllm/gpt-oss.md
@@ -90,14 +90,14 @@ The deployment uses configuration files and command-line arguments to control be
 
 #### Configuration Files
 
-**Prefill Configuration (`engine_configs/gpt_oss/prefill.yaml`)**:
+**Prefill Configuration (`recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml`)**:
 - `enable_attention_dp: false` - Attention data parallelism disabled for prefill
 - `enable_chunked_prefill: true` - Enables efficient chunked prefill processing
 - `moe_config.backend: CUTLASS` - Uses optimized CUTLASS kernels for MoE layers
 - `cache_transceiver_config.backend: ucx` - Uses UCX for efficient KV cache transfer
 - `cuda_graph_config.max_batch_size: 32` - Maximum batch size for CUDA graphs
 
-**Decode Configuration (`engine_configs/gpt_oss/decode.yaml`)**:
+**Decode Configuration (`recipes/gpt-oss-120b/trtllm/disagg/decode.yaml`)**:
 - `enable_attention_dp: true` - Attention data parallelism enabled for decode
 - `disable_overlap_scheduler: false` - Enables overlapping for decode efficiency
 - `moe_config.backend: CUTLASS` - Uses optimized CUTLASS kernels for MoE layers
@@ -147,7 +147,7 @@ python3 -m dynamo.frontend --router-mode round-robin --http-port 8000 &
 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m dynamo.trtllm \
   --model-path /model \
   --served-model-name openai/gpt-oss-120b \
-  --extra-engine-args engine_configs/gpt_oss/prefill.yaml \
+  --extra-engine-args recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml \
   --dyn-reasoning-parser gpt_oss \
   --dyn-tool-call-parser harmony \
   --disaggregation-mode prefill \
@@ -164,7 +164,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m dynamo.trtllm \
 CUDA_VISIBLE_DEVICES=4,5,6,7 python3 -m dynamo.trtllm \
   --model-path /model \
   --served-model-name openai/gpt-oss-120b \
-  --extra-engine-args engine_configs/gpt_oss/decode.yaml \
+  --extra-engine-args recipes/gpt-oss-120b/trtllm/disagg/decode.yaml \
   --dyn-reasoning-parser gpt_oss \
   --dyn-tool-call-parser harmony \
   --disaggregation-mode decode \
diff --git a/docs/backends/trtllm/llama4_plus_eagle.md b/docs/backends/trtllm/llama4_plus_eagle.md
index 201b185243..f15bfa669f 100644
--- a/docs/backends/trtllm/llama4_plus_eagle.md
+++ b/docs/backends/trtllm/llama4_plus_eagle.md
@@ -30,7 +30,7 @@ This guide demonstrates how to deploy Llama 4 Maverick Instruct with Eagle Specu
 For advanced control over how requests are routed between prefill and decode workers in disaggregated mode, refer to the [Disaggregation Strategy](./README.md#disaggregation-strategy) section.
 
 ## Notes
-* Make sure the (`eagle3_one_model: true`) is set in the LLM API config inside the `engine_configs/llama4/eagle` folder.
+* Make sure the (`eagle3_one_model: true`) is set in the LLM API config inside the `recipes/llama4/trtllm/eagle` folder.
 
 ## Setup
 
@@ -54,7 +54,7 @@ See [this](./multinode/multinode-examples.md#setup) section from multinode guide
 ## Aggregated Serving
 ```bash
 export NUM_NODES=1
-export ENGINE_CONFIG="/mnt/engine_configs/llama4/eagle/eagle_agg.yaml"
+export ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/eagle/eagle_agg.yml"
 ./multinode/srun_aggregated.sh
 ```
 
@@ -62,9 +62,9 @@ export ENGINE_CONFIG="/mnt/engine_configs/llama4/eagle/eagle_agg.yaml"
 
 ```bash
 export NUM_PREFILL_NODES=1
-export PREFILL_ENGINE_CONFIG="/mnt/engine_configs/llama4/eagle/eagle_prefill.yaml"
+export PREFILL_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/eagle/eagle_prefill.yaml"
 export NUM_DECODE_NODES=1
-export DECODE_ENGINE_CONFIG="/mnt/engine_configs/llama4/eagle/eagle_decode.yaml"
+export DECODE_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/eagle/eagle_decode.yaml"
 ./multinode/srun_disaggregated.sh
 ```
 
diff --git a/docs/backends/trtllm/multimodal_support.md b/docs/backends/trtllm/multimodal_support.md
index a8cb246f41..3e6fffc353 100644
--- a/docs/backends/trtllm/multimodal_support.md
+++ b/docs/backends/trtllm/multimodal_support.md
@@ -27,7 +27,7 @@ Here are quick steps to launch Llama-4 Maverick BF16 in aggregated mode
 ```bash
 cd $DYNAMO_HOME/components/backends/trtllm
 
-export AGG_ENGINE_ARGS=./engine_configs/multinode/agg.yaml
+export AGG_ENGINE_ARGS=./recipes/llama4/trtllm/multimodal/agg.yaml
 export SERVED_MODEL_NAME="meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 export MODEL_PATH="meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 ./launch/agg.sh
@@ -80,8 +80,8 @@ cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen2-VL-7B-Instruct"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen2-VL-7B-Instruct"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/multimodal/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/multimodal/decode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"recipes/llama4/trtllm/multimodal/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"recipes/llama4/trtllm/multimodal/decode.yaml"}
 export MODALITY=${MODALITY:-"multimodal"}
 
 ./launch/disagg.sh
diff --git a/docs/backends/trtllm/multinode/multinode-examples.md b/docs/backends/trtllm/multinode/multinode-examples.md
index be76bad1ba..622ab10637 100644
--- a/docs/backends/trtllm/multinode/multinode-examples.md
+++ b/docs/backends/trtllm/multinode/multinode-examples.md
@@ -136,7 +136,7 @@ follow these steps below to launch an **aggregated** deployment across 4 nodes:
 
 ```bash
 # Default set in srun_aggregated.sh, but can customize here.
-# export ENGINE_CONFIG="/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_agg.yaml"
+# export ENGINE_CONFIG="/mnt/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_agg.yaml"
 
 # Customize NUM_NODES to match the desired parallelism in ENGINE_CONFIG
 # The product of NUM_NODES*NUM_GPUS_PER_NODE should match the number of
@@ -165,8 +165,8 @@ deployment across 8 nodes:
 
 ```bash
 # Defaults set in srun_disaggregated.sh, but can customize here.
-# export PREFILL_ENGINE_CONFIG="/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml"
-# export DECODE_ENGINE_CONFIG="/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml"
+# export PREFILL_ENGINE_CONFIG="/mnt/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_prefill.yaml"
+# export DECODE_ENGINE_CONFIG="/mnt/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_decode.yaml"
 
 # Customize NUM_PREFILL_NODES to match the desired parallelism in PREFILL_ENGINE_CONFIG
 # Customize NUM_DECODE_NODES to match the desired parallelism in DECODE_ENGINE_CONFIG
diff --git a/docs/backends/trtllm/multinode/multinode-multimodal-example.md b/docs/backends/trtllm/multinode/multinode-multimodal-example.md
index fe050efd3c..9546f7a210 100644
--- a/docs/backends/trtllm/multinode/multinode-multimodal-example.md
+++ b/docs/backends/trtllm/multinode/multinode-multimodal-example.md
@@ -34,7 +34,7 @@ limitations under the License.
 >
 > Before running the deployment, you must update the engine configuration files to change `backend: DEFAULT` to `backend: default` (lowercase). Run the following command:
 > ```bash
-> sed -i 's/backend: DEFAULT/backend: default/g' /mnt/engine_configs/multimodal/llama4/prefill.yaml /mnt/engine_configs/multimodal/llama4/decode.yaml
+> sed -i 's/backend: DEFAULT/backend: default/g' /mnt/recipes/llama4/trtllm/multimodal/prefill.yaml /mnt/recipes/llama4/trtllm/multimodal/decode.yaml
 > ```
 
 
@@ -100,8 +100,8 @@ deployment across 4 nodes:
 
 ```bash
 # Defaults set in srun_disaggregated.sh, but can customize here.
-# export PREFILL_ENGINE_CONFIG="/mnt/engine_configs/multimodal/llama4/prefill.yaml"
-# export DECODE_ENGINE_CONFIG="/mnt/engine_configs/multimodal/llama4/decode.yaml"
+# export PREFILL_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/multimodal/prefill.yaml"
+# export DECODE_ENGINE_CONFIG="/mnt/recipes/llama4/trtllm/multimodal/decode.yaml"
 
 # Customize NUM_PREFILL_NODES to match the desired parallelism in PREFILL_ENGINE_CONFIG
 # Customize NUM_DECODE_NODES to match the desired parallelism in DECODE_ENGINE_CONFIG
diff --git a/components/backends/trtllm/multinode/start_frontend_services.sh b/examples/multimodal/scripts/start_frontend_services.sh
similarity index 100%
rename from components/backends/trtllm/multinode/start_frontend_services.sh
rename to examples/multimodal/scripts/start_frontend_services.sh
diff --git a/components/backends/trtllm/multinode/start_trtllm_worker.sh b/examples/multimodal/scripts/start_trtllm_worker.sh
similarity index 100%
rename from components/backends/trtllm/multinode/start_trtllm_worker.sh
rename to examples/multimodal/scripts/start_trtllm_worker.sh
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_agg.yaml b/recipes/deepseek-r1/trtllm/mtp/mtp_agg.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_agg.yaml
rename to recipes/deepseek-r1/trtllm/mtp/mtp_agg.yaml
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_decode.yaml b/recipes/deepseek-r1/trtllm/mtp/mtp_decode.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_decode.yaml
rename to recipes/deepseek-r1/trtllm/mtp/mtp_decode.yaml
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_prefill.yaml b/recipes/deepseek-r1/trtllm/mtp/mtp_prefill.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_prefill.yaml
rename to recipes/deepseek-r1/trtllm/mtp/mtp_prefill.yaml
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/simple/agg.yaml b/recipes/deepseek-r1/trtllm/simple/agg.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/deepseek_r1/simple/agg.yaml
rename to recipes/deepseek-r1/trtllm/simple/agg.yaml
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/simple/decode.yaml b/recipes/deepseek-r1/trtllm/simple/decode.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/deepseek_r1/simple/decode.yaml
rename to recipes/deepseek-r1/trtllm/simple/decode.yaml
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/simple/prefill.yaml b/recipes/deepseek-r1/trtllm/simple/prefill.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/deepseek_r1/simple/prefill.yaml
rename to recipes/deepseek-r1/trtllm/simple/prefill.yaml
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/dep16_agg.yaml b/recipes/deepseek-r1/trtllm/wide_ep/dep16_agg.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/dep16_agg.yaml
rename to recipes/deepseek-r1/trtllm/wide_ep/dep16_agg.yaml
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/eplb.yaml b/recipes/deepseek-r1/trtllm/wide_ep/eplb.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/eplb.yaml
rename to recipes/deepseek-r1/trtllm/wide_ep/eplb.yaml
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_agg.yaml b/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_agg.yaml
similarity index 92%
rename from components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_agg.yaml
rename to recipes/deepseek-r1/trtllm/wide_ep/wide_ep_agg.yaml
index d32aab2dd3..31d7e395bd 100644
--- a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_agg.yaml
+++ b/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_agg.yaml
@@ -11,7 +11,7 @@ moe_config:
   #   moe_max_num_tokens = max_batch_size * moe_expert_parallel_size
   #   4096 = 256 * 16
   # moe_max_num_tokens: 4096
-  load_balancer: /mnt/engine_configs/deepseek_r1/wide_ep/eplb.yaml
+  load_balancer: /mnt/recipes/deepseek-r1/trtllm/wide_ep/eplb.yaml
 
 tensor_parallel_size: 16
 moe_expert_parallel_size: 16
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml b/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_decode.yaml
similarity index 96%
rename from components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml
rename to recipes/deepseek-r1/trtllm/wide_ep/wide_ep_decode.yaml
index 8f953c6472..6d36ab5ce6 100644
--- a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml
+++ b/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_decode.yaml
@@ -17,7 +17,7 @@ backend: pytorch
 # WideEP related settings
 moe_config:
   backend: WIDEEP
-  load_balancer: /mnt/engine_configs/deepseek_r1/wide_ep/eplb.yaml
+  load_balancer: /mnt/recipes/deepseek-r1/trtllm/wide_ep/eplb.yaml
 
 # TP/EP/PP/DP
 tensor_parallel_size: 16
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml b/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_prefill.yaml
similarity index 95%
rename from components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml
rename to recipes/deepseek-r1/trtllm/wide_ep/wide_ep_prefill.yaml
index 8a756cc32b..7af74c74ae 100644
--- a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml
+++ b/recipes/deepseek-r1/trtllm/wide_ep/wide_ep_prefill.yaml
@@ -17,7 +17,7 @@ backend: pytorch
 # WideEP related settings
 moe_config:
   backend: WIDEEP
-  load_balancer: /mnt/engine_configs/deepseek_r1/wide_ep/eplb.yaml
+  load_balancer: /mnt/recipes/deepseek-r1/trtllm/wide_ep/eplb.yaml
 
 # TP/EP/PP/DP
 tensor_parallel_size: 16
diff --git a/components/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml b/recipes/gemma3/trtllm/vswa_agg.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/gemma3/vswa_agg.yaml
rename to recipes/gemma3/trtllm/vswa_agg.yaml
diff --git a/components/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml b/recipes/gemma3/trtllm/vswa_decode.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
rename to recipes/gemma3/trtllm/vswa_decode.yaml
diff --git a/components/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml b/recipes/gemma3/trtllm/vswa_prefill.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
rename to recipes/gemma3/trtllm/vswa_prefill.yaml
diff --git a/components/backends/trtllm/engine_configs/gpt_oss/decode.yaml b/recipes/gpt-oss-120b/trtllm/disagg/decode.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/gpt_oss/decode.yaml
rename to recipes/gpt-oss-120b/trtllm/disagg/decode.yaml
diff --git a/components/backends/trtllm/engine_configs/gpt_oss/prefill.yaml b/recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/gpt_oss/prefill.yaml
rename to recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml
diff --git a/components/backends/trtllm/engine_configs/llama4/eagle/eagle_agg.yml b/recipes/llama4/trtllm/eagle/eagle_agg.yml
similarity index 100%
rename from components/backends/trtllm/engine_configs/llama4/eagle/eagle_agg.yml
rename to recipes/llama4/trtllm/eagle/eagle_agg.yml
diff --git a/components/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml b/recipes/llama4/trtllm/eagle/eagle_decode.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
rename to recipes/llama4/trtllm/eagle/eagle_decode.yaml
diff --git a/components/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml b/recipes/llama4/trtllm/eagle/eagle_prefill.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
rename to recipes/llama4/trtllm/eagle/eagle_prefill.yaml
diff --git a/components/backends/trtllm/engine_configs/multimodal/llama4/decode.yaml b/recipes/llama4/trtllm/multimodal/decode.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/multimodal/llama4/decode.yaml
rename to recipes/llama4/trtllm/multimodal/decode.yaml
diff --git a/components/backends/trtllm/engine_configs/multimodal/llama4/prefill.yaml b/recipes/llama4/trtllm/multimodal/prefill.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/multimodal/llama4/prefill.yaml
rename to recipes/llama4/trtllm/multimodal/prefill.yaml

From 1c5116458e6503e6c4bc309b979151cd4c74692b Mon Sep 17 00:00:00 2001
From: Anant Sharma <anants@nvidia.com>
Date: Tue, 21 Oct 2025 07:18:41 -0700
Subject: [PATCH 2/8] fix multi node files

Signed-off-by: Anant Sharma <anants@nvidia.com>
---
 .../trtllm/engine_configs/multimodal/agg.yaml | 29 +++++++++++++++++
 .../engine_configs/multimodal/decode.yaml     | 29 +++++++++++++++++
 .../engine_configs/multimodal/prefill.yaml    | 31 +++++++++++++++++++
 docs/backends/trtllm/multimodal_support.md    |  6 ++--
 .../multinode/trtllm}/srun_aggregated.sh      |  2 +-
 .../multinode/trtllm}/srun_disaggregated.sh   |  2 +-
 .../trtllm}/start_frontend_services.sh        |  0
 .../multinode/trtllm}/start_trtllm_worker.sh  |  0
 8 files changed, 94 insertions(+), 5 deletions(-)
 create mode 100644 components/backends/trtllm/engine_configs/multimodal/agg.yaml
 create mode 100644 components/backends/trtllm/engine_configs/multimodal/decode.yaml
 create mode 100644 components/backends/trtllm/engine_configs/multimodal/prefill.yaml
 rename {components/backends/trtllm/multinode => examples/basics/multinode/trtllm}/srun_aggregated.sh (98%)
 rename {components/backends/trtllm/multinode => examples/basics/multinode/trtllm}/srun_disaggregated.sh (99%)
 rename examples/{multimodal/scripts => basics/multinode/trtllm}/start_frontend_services.sh (100%)
 rename examples/{multimodal/scripts => basics/multinode/trtllm}/start_trtllm_worker.sh (100%)

diff --git a/components/backends/trtllm/engine_configs/multimodal/agg.yaml b/components/backends/trtllm/engine_configs/multimodal/agg.yaml
new file mode 100644
index 0000000000..6dbd676ee4
--- /dev/null
+++ b/components/backends/trtllm/engine_configs/multimodal/agg.yaml
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+tensor_parallel_size: 1
+moe_expert_parallel_size: 1
+enable_attention_dp: false
+max_num_tokens: 8192
+max_batch_size: 16
+trust_remote_code: true
+backend: pytorch
+enable_chunked_prefill: true
+disable_overlap_scheduler: false
+kv_cache_config:
+  free_gpu_memory_fraction: 0.30
+  enable_block_reuse: false
+
+cache_transceiver_config:
+  backend: DEFAULT
\ No newline at end of file
diff --git a/components/backends/trtllm/engine_configs/multimodal/decode.yaml b/components/backends/trtllm/engine_configs/multimodal/decode.yaml
new file mode 100644
index 0000000000..6dbd676ee4
--- /dev/null
+++ b/components/backends/trtllm/engine_configs/multimodal/decode.yaml
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+tensor_parallel_size: 1
+moe_expert_parallel_size: 1
+enable_attention_dp: false
+max_num_tokens: 8192
+max_batch_size: 16
+trust_remote_code: true
+backend: pytorch
+enable_chunked_prefill: true
+disable_overlap_scheduler: false
+kv_cache_config:
+  free_gpu_memory_fraction: 0.30
+  enable_block_reuse: false
+
+cache_transceiver_config:
+  backend: DEFAULT
\ No newline at end of file
diff --git a/components/backends/trtllm/engine_configs/multimodal/prefill.yaml b/components/backends/trtllm/engine_configs/multimodal/prefill.yaml
new file mode 100644
index 0000000000..83a65e8bf3
--- /dev/null
+++ b/components/backends/trtllm/engine_configs/multimodal/prefill.yaml
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+tensor_parallel_size: 1
+moe_expert_parallel_size: 1
+enable_attention_dp: false
+max_num_tokens: 8192
+max_batch_size: 16
+trust_remote_code: true
+backend: pytorch
+enable_chunked_prefill: true
+# Overlap scheduler not currently supported in prefill only workers.
+disable_overlap_scheduler: true
+
+kv_cache_config:
+  free_gpu_memory_fraction: 0.30
+  enable_block_reuse: false
+
+cache_transceiver_config:
+  backend: DEFAULT
\ No newline at end of file
diff --git a/docs/backends/trtllm/multimodal_support.md b/docs/backends/trtllm/multimodal_support.md
index 3e6fffc353..a8cb246f41 100644
--- a/docs/backends/trtllm/multimodal_support.md
+++ b/docs/backends/trtllm/multimodal_support.md
@@ -27,7 +27,7 @@ Here are quick steps to launch Llama-4 Maverick BF16 in aggregated mode
 ```bash
 cd $DYNAMO_HOME/components/backends/trtllm
 
-export AGG_ENGINE_ARGS=./recipes/llama4/trtllm/multimodal/agg.yaml
+export AGG_ENGINE_ARGS=./engine_configs/multinode/agg.yaml
 export SERVED_MODEL_NAME="meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 export MODEL_PATH="meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 ./launch/agg.sh
@@ -80,8 +80,8 @@ cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen2-VL-7B-Instruct"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen2-VL-7B-Instruct"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"recipes/llama4/trtllm/multimodal/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"recipes/llama4/trtllm/multimodal/decode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/multimodal/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/multimodal/decode.yaml"}
 export MODALITY=${MODALITY:-"multimodal"}
 
 ./launch/disagg.sh
diff --git a/components/backends/trtllm/multinode/srun_aggregated.sh b/examples/basics/multinode/trtllm/srun_aggregated.sh
similarity index 98%
rename from components/backends/trtllm/multinode/srun_aggregated.sh
rename to examples/basics/multinode/trtllm/srun_aggregated.sh
index e9568db9cd..d1645e522b 100755
--- a/components/backends/trtllm/multinode/srun_aggregated.sh
+++ b/examples/basics/multinode/trtllm/srun_aggregated.sh
@@ -10,7 +10,7 @@ IMAGE="${IMAGE:-""}"
 # but you may freely customize the mounts based on your cluster. A common practice
 # is to mount paths to NFS storage for common scripts, model weights, etc.
 # NOTE: This can be a comma separated list of multiple mounts as well.
-DEFAULT_MOUNT="${PWD}/../:/mnt"
+DEFAULT_MOUNT="${PWD}/../../../:/mnt"
 MOUNTS="${MOUNTS:-${DEFAULT_MOUNT}}"
 
 # Example values, assuming 4 nodes with 4 GPUs on each node, such as 4xGB200 nodes.
diff --git a/components/backends/trtllm/multinode/srun_disaggregated.sh b/examples/basics/multinode/trtllm/srun_disaggregated.sh
similarity index 99%
rename from components/backends/trtllm/multinode/srun_disaggregated.sh
rename to examples/basics/multinode/trtllm/srun_disaggregated.sh
index fddac1ccbe..5c8e5bd755 100755
--- a/components/backends/trtllm/multinode/srun_disaggregated.sh
+++ b/examples/basics/multinode/trtllm/srun_disaggregated.sh
@@ -10,7 +10,7 @@ IMAGE="${IMAGE:-""}"
 # but you may freely customize the mounts based on your cluster. A common practice
 # is to mount paths to NFS storage for common scripts, model weights, etc.
 # NOTE: This can be a comma separated list of multiple mounts as well.
-DEFAULT_MOUNT="${PWD}/../:/mnt"
+DEFAULT_MOUNT="${PWD}/../../../:/mnt"
 MOUNTS="${MOUNTS:-${DEFAULT_MOUNT}}"
 
 NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE:-4}
diff --git a/examples/multimodal/scripts/start_frontend_services.sh b/examples/basics/multinode/trtllm/start_frontend_services.sh
similarity index 100%
rename from examples/multimodal/scripts/start_frontend_services.sh
rename to examples/basics/multinode/trtllm/start_frontend_services.sh
diff --git a/examples/multimodal/scripts/start_trtllm_worker.sh b/examples/basics/multinode/trtllm/start_trtllm_worker.sh
similarity index 100%
rename from examples/multimodal/scripts/start_trtllm_worker.sh
rename to examples/basics/multinode/trtllm/start_trtllm_worker.sh

From d2fcf83ab07cb03b4223aefb15e7aef7f6f36107 Mon Sep 17 00:00:00 2001
From: Anant Sharma <anants@nvidia.com>
Date: Tue, 21 Oct 2025 07:26:16 -0700
Subject: [PATCH 3/8] copy paste fix

Signed-off-by: Anant Sharma <anants@nvidia.com>
---
 .../trtllm/engine_configs/multimodal/agg.yaml    | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/components/backends/trtllm/engine_configs/multimodal/agg.yaml b/components/backends/trtllm/engine_configs/multimodal/agg.yaml
index 6dbd676ee4..24bc75601c 100644
--- a/components/backends/trtllm/engine_configs/multimodal/agg.yaml
+++ b/components/backends/trtllm/engine_configs/multimodal/agg.yaml
@@ -12,18 +12,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-tensor_parallel_size: 1
+tensor_parallel_size: 8
 moe_expert_parallel_size: 1
 enable_attention_dp: false
-max_num_tokens: 8192
-max_batch_size: 16
+max_num_tokens: 4096
+max_batch_size: 8
 trust_remote_code: true
 backend: pytorch
 enable_chunked_prefill: true
-disable_overlap_scheduler: false
+
 kv_cache_config:
-  free_gpu_memory_fraction: 0.30
+  free_gpu_memory_fraction: 0.3
   enable_block_reuse: false
 
 cache_transceiver_config:
-  backend: DEFAULT
\ No newline at end of file
+  backend: DEFAULT
+# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
+# NOTE: overlap_scheduler enabled by default since this commit and changed
+# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
+# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
\ No newline at end of file

From 126ccd4b6c5c86ba0410f87b47599b7a2cee8317 Mon Sep 17 00:00:00 2001
From: Anant Sharma <anants@nvidia.com>
Date: Tue, 21 Oct 2025 07:26:57 -0700
Subject: [PATCH 4/8] fix

Signed-off-by: Anant Sharma <anants@nvidia.com>
---
 components/backends/trtllm/engine_configs/multimodal/agg.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/components/backends/trtllm/engine_configs/multimodal/agg.yaml b/components/backends/trtllm/engine_configs/multimodal/agg.yaml
index 24bc75601c..754f8ce759 100644
--- a/components/backends/trtllm/engine_configs/multimodal/agg.yaml
+++ b/components/backends/trtllm/engine_configs/multimodal/agg.yaml
@@ -30,4 +30,4 @@ cache_transceiver_config:
 # NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
 # NOTE: overlap_scheduler enabled by default since this commit and changed
 # config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
-# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
\ No newline at end of file
+# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428

From b14ec2723a0198c9dc7bcd5c240d12a5b3afa2ce Mon Sep 17 00:00:00 2001
From: tanmayv25 <tanmay2592@gmail.com>
Date: Wed, 22 Oct 2025 15:14:33 -0700
Subject: [PATCH 5/8] move remaining recipes

---
 benchmarks/router/run_engines.sh              | 10 +++---
 .../trtllm/deploy/agg-with-config.yaml        |  4 +--
 components/backends/trtllm/deploy/agg.yaml    |  4 +--
 .../backends/trtllm/deploy/agg_router.yaml    |  4 +--
 .../trtllm/deploy/disagg-multinode.yaml       | 12 +++----
 components/backends/trtllm/deploy/disagg.yaml |  8 ++---
 .../trtllm/deploy/disagg_planner.yaml         |  8 ++---
 .../backends/trtllm/deploy/disagg_router.yaml |  8 ++---
 components/backends/trtllm/launch/agg.sh      |  3 +-
 .../backends/trtllm/launch/agg_metrics.sh     |  3 +-
 .../backends/trtllm/launch/agg_router.sh      |  3 +-
 components/backends/trtllm/launch/disagg.sh   |  5 +--
 .../backends/trtllm/launch/disagg_router.sh   |  5 +--
 .../backends/trtllm/launch/epd_disagg.sh      |  7 ++--
 .../backends/trtllm/launch/gpt_oss_disagg.sh  |  5 +--
 docs/backends/trtllm/multimodal_support.md    | 10 +++---
 docs/kubernetes/README.md                     |  2 +-
 .../trtllm}/agg.yaml                          |  0
 .../trtllm}/decode.yaml                       |  0
 .../trtllm}/prefill.yaml                      |  0
 .../llama4/trtllm}/multimodal/agg.yaml        |  0
 recipes/qwen2-vl-7b-instruct/trtllm/agg.yaml  | 33 ++++++++++++++++++
 .../qwen2-vl-7b-instruct/trtllm}/decode.yaml  |  0
 .../qwen2-vl-7b-instruct/trtllm}/encode.yaml  |  0
 .../qwen2-vl-7b-instruct/trtllm}/prefill.yaml |  0
 recipes/qwen3/trtllm/agg.yaml                 | 34 +++++++++++++++++++
 recipes/qwen3/trtllm/decode.yaml              | 31 +++++++++++++++++
 recipes/qwen3/trtllm/prefill.yaml             | 30 ++++++++++++++++
 28 files changed, 183 insertions(+), 46 deletions(-)
 rename {components/backends/trtllm/engine_configs => recipes/deepseek-r1-distill-llama-8b/trtllm}/agg.yaml (100%)
 rename {components/backends/trtllm/engine_configs => recipes/deepseek-r1-distill-llama-8b/trtllm}/decode.yaml (100%)
 rename {components/backends/trtllm/engine_configs => recipes/deepseek-r1-distill-llama-8b/trtllm}/prefill.yaml (100%)
 rename {components/backends/trtllm/engine_configs => recipes/llama4/trtllm}/multimodal/agg.yaml (100%)
 create mode 100644 recipes/qwen2-vl-7b-instruct/trtllm/agg.yaml
 rename {components/backends/trtllm/engine_configs/multimodal => recipes/qwen2-vl-7b-instruct/trtllm}/decode.yaml (100%)
 rename {components/backends/trtllm/engine_configs => recipes/qwen2-vl-7b-instruct/trtllm}/encode.yaml (100%)
 rename {components/backends/trtllm/engine_configs/multimodal => recipes/qwen2-vl-7b-instruct/trtllm}/prefill.yaml (100%)
 create mode 100644 recipes/qwen3/trtllm/agg.yaml
 create mode 100644 recipes/qwen3/trtllm/decode.yaml
 create mode 100644 recipes/qwen3/trtllm/prefill.yaml

diff --git a/benchmarks/router/run_engines.sh b/benchmarks/router/run_engines.sh
index 18a97c8e28..2bda93c632 100755
--- a/benchmarks/router/run_engines.sh
+++ b/benchmarks/router/run_engines.sh
@@ -4,8 +4,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Parse command-line arguments
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 NUM_WORKERS=8
 MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+RECIPE_PATH="$DYNAMO_HOME/recipes/deepseek-r1-distill-llama-8b/trtllm"
 TENSOR_PARALLEL_SIZE=1
 DATA_PARALLEL_SIZE=1
 USE_MOCKERS=false
@@ -84,13 +86,13 @@ if [ ${#EXTRA_ARGS[@]} -eq 0 ]; then
         )
     elif [ "$USE_TRTLLM" = true ]; then
         # Default args for TensorRT-LLM engine using predefined YAML configs
-        # Config files located at: ../../components/backends/trtllm/engine_configs/{agg,decode,prefill}.yaml
+        # Config files located at: $RECIPE_PATH/{agg,decode,prefill}.yaml
         if [ "$MODE" = "prefill" ]; then
-            ENGINE_CONFIG="../../components/backends/trtllm/engine_configs/prefill.yaml"
+            ENGINE_CONFIG="$RECIPE_PATH/prefill.yaml"
         elif [ "$MODE" = "decode" ]; then
-            ENGINE_CONFIG="../../components/backends/trtllm/engine_configs/decode.yaml"
+            ENGINE_CONFIG="$RECIPE_PATH/decode.yaml"
         else
-            ENGINE_CONFIG="../../components/backends/trtllm/engine_configs/agg.yaml"
+            ENGINE_CONFIG="$RECIPE_PATH/agg.yaml"
         fi
 
         EXTRA_ARGS=(
diff --git a/components/backends/trtllm/deploy/agg-with-config.yaml b/components/backends/trtllm/deploy/agg-with-config.yaml
index dd15e56e65..e40ca48ada 100644
--- a/components/backends/trtllm/deploy/agg-with-config.yaml
+++ b/components/backends/trtllm/deploy/agg-with-config.yaml
@@ -55,7 +55,7 @@ spec:
           # mount the configmap as a volume
           volumeMounts:
           - name: nvidia-config
-            mountPath: /workspace/components/backends/trtllm/engine_configs
+            mountPath: /workspace/
             readOnly: true
           command:
           - python3
@@ -67,4 +67,4 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/agg.yaml
+            - ./recipes/qwen3/trtllm/agg.yaml
diff --git a/components/backends/trtllm/deploy/agg.yaml b/components/backends/trtllm/deploy/agg.yaml
index c7187673e4..54412576a2 100644
--- a/components/backends/trtllm/deploy/agg.yaml
+++ b/components/backends/trtllm/deploy/agg.yaml
@@ -25,7 +25,7 @@ spec:
       extraPodSpec:
         mainContainer:
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
           - python3
           - -m
@@ -36,4 +36,4 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/agg.yaml
+            - ./recipes/qwen3/trtllm/agg.yaml
diff --git a/components/backends/trtllm/deploy/agg_router.yaml b/components/backends/trtllm/deploy/agg_router.yaml
index 787deb9847..ed42129fb4 100644
--- a/components/backends/trtllm/deploy/agg_router.yaml
+++ b/components/backends/trtllm/deploy/agg_router.yaml
@@ -28,7 +28,7 @@ spec:
       extraPodSpec:
         mainContainer:
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
           - python3
           - -m
@@ -39,5 +39,5 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/agg.yaml
+            - ./recipes/qwen3/trtllm/agg.yaml
             - --publish-events-and-metrics
diff --git a/components/backends/trtllm/deploy/disagg-multinode.yaml b/components/backends/trtllm/deploy/disagg-multinode.yaml
index 3da492107a..2906cfd193 100644
--- a/components/backends/trtllm/deploy/disagg-multinode.yaml
+++ b/components/backends/trtllm/deploy/disagg-multinode.yaml
@@ -125,10 +125,10 @@ spec:
         mainContainer:
           volumeMounts:
             - name: nvidia-config
-              mountPath: /workspace/components/backends/trtllm/engine_configs
+              mountPath: /workspace/
               readOnly: true
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
           - python3
           - -m
@@ -139,7 +139,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/prefill.yaml
+            - ./recipes/qwen3/trtllm/prefill.yaml
             - --disaggregation-mode
             - prefill
             - --disaggregation-strategy
@@ -165,10 +165,10 @@ spec:
         mainContainer:
           volumeMounts:
             - name: nvidia-config
-              mountPath: /workspace/components/backends/trtllm/engine_configs
+              mountPath: /workspace/
               readOnly: true
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
           - python3
           - -m
@@ -179,7 +179,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/decode.yaml
+            - ./recipes/qwen3/trtllm/decode.yaml
             - --disaggregation-mode
             - decode
             - --disaggregation-strategy
diff --git a/components/backends/trtllm/deploy/disagg.yaml b/components/backends/trtllm/deploy/disagg.yaml
index 9055967dfe..501d2a4c20 100644
--- a/components/backends/trtllm/deploy/disagg.yaml
+++ b/components/backends/trtllm/deploy/disagg.yaml
@@ -26,7 +26,7 @@ spec:
       extraPodSpec:
         mainContainer:
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
           - python3
           - -m
@@ -37,7 +37,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/prefill.yaml
+            - ./recipes/qwen3/trtllm/prefill.yaml
             - --disaggregation-mode
             - prefill
             - --disaggregation-strategy
@@ -54,7 +54,7 @@ spec:
       extraPodSpec:
         mainContainer:
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
           - python3
           - -m
@@ -65,7 +65,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/decode.yaml
+            - ./recipes/qwen3/trtllm/decode.yaml
             - --disaggregation-mode
             - decode
             - --disaggregation-strategy
diff --git a/components/backends/trtllm/deploy/disagg_planner.yaml b/components/backends/trtllm/deploy/disagg_planner.yaml
index 09326e786d..5f0ef6b808 100644
--- a/components/backends/trtllm/deploy/disagg_planner.yaml
+++ b/components/backends/trtllm/deploy/disagg_planner.yaml
@@ -86,7 +86,7 @@ spec:
         terminationGracePeriodSeconds: 600
         mainContainer:
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
             - python3
           args:
@@ -97,7 +97,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/decode.yaml
+            - ./recipes/qwen3/trtllm/decode.yaml
             - --disaggregation-mode
             - decode
             - --disaggregation-strategy
@@ -115,7 +115,7 @@ spec:
         terminationGracePeriodSeconds: 600
         mainContainer:
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
             - python3
           args:
@@ -126,7 +126,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/prefill.yaml
+            - ./recipes/qwen3/trtllm/prefill.yaml
             - --disaggregation-mode
             - prefill
             - --disaggregation-strategy
diff --git a/components/backends/trtllm/deploy/disagg_router.yaml b/components/backends/trtllm/deploy/disagg_router.yaml
index 31fde39e05..f687354a95 100644
--- a/components/backends/trtllm/deploy/disagg_router.yaml
+++ b/components/backends/trtllm/deploy/disagg_router.yaml
@@ -28,7 +28,7 @@ spec:
       extraPodSpec:
         mainContainer:
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
           - python3
           - -m
@@ -39,7 +39,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/prefill.yaml
+            - ./recipes/qwen3/trtllm/prefill.yaml
             - --disaggregation-mode
             - prefill
             - --disaggregation-strategy
@@ -56,7 +56,7 @@ spec:
       extraPodSpec:
         mainContainer:
           image: my-registry/trtllm-runtime:my-tag
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/
           command:
           - python3
           - -m
@@ -67,7 +67,7 @@ spec:
             - --served-model-name
             - Qwen/Qwen3-0.6B
             - --extra-engine-args
-            - engine_configs/decode.yaml
+            - ./recipes/qwen3/trtllm/decode.yaml
             - --disaggregation-mode
             - decode
             - --disaggregation-strategy
diff --git a/components/backends/trtllm/launch/agg.sh b/components/backends/trtllm/launch/agg.sh
index 5c7021c59c..f141531d7d 100755
--- a/components/backends/trtllm/launch/agg.sh
+++ b/components/backends/trtllm/launch/agg.sh
@@ -3,9 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"engine_configs/agg.yaml"}
+export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
 export MODALITY=${MODALITY:-"text"}
 # If you want to use multimodal, set MODALITY to "multimodal"
 #export MODALITY=${MODALITY:-"multimodal"}
diff --git a/components/backends/trtllm/launch/agg_metrics.sh b/components/backends/trtllm/launch/agg_metrics.sh
index 3232576d76..ad01482a8c 100755
--- a/components/backends/trtllm/launch/agg_metrics.sh
+++ b/components/backends/trtllm/launch/agg_metrics.sh
@@ -3,9 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"engine_configs/agg.yaml"}
+export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
 export MODALITY=${MODALITY:-"text"}
 
 # Setup cleanup trap
diff --git a/components/backends/trtllm/launch/agg_router.sh b/components/backends/trtllm/launch/agg_router.sh
index ca6d439e63..bb69762735 100755
--- a/components/backends/trtllm/launch/agg_router.sh
+++ b/components/backends/trtllm/launch/agg_router.sh
@@ -3,9 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
-export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"engine_configs/agg.yaml"}
+export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/agg.yaml"}
 
 # Setup cleanup trap
 cleanup() {
diff --git a/components/backends/trtllm/launch/disagg.sh b/components/backends/trtllm/launch/disagg.sh
index f89eba5c9e..a068c2979a 100755
--- a/components/backends/trtllm/launch/disagg.sh
+++ b/components/backends/trtllm/launch/disagg.sh
@@ -3,11 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/decode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/decode.yaml"}
 export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
 export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}
 export MODALITY=${MODALITY:-"text"}
diff --git a/components/backends/trtllm/launch/disagg_router.sh b/components/backends/trtllm/launch/disagg_router.sh
index e29c851a56..7fdfee7746 100755
--- a/components/backends/trtllm/launch/disagg_router.sh
+++ b/components/backends/trtllm/launch/disagg_router.sh
@@ -3,11 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"prefill_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/decode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen3/trtllm/decode.yaml"}
 export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
 export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}
 
diff --git a/components/backends/trtllm/launch/epd_disagg.sh b/components/backends/trtllm/launch/epd_disagg.sh
index 60cfa1c249..ebe2e42330 100755
--- a/components/backends/trtllm/launch/epd_disagg.sh
+++ b/components/backends/trtllm/launch/epd_disagg.sh
@@ -3,12 +3,13 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen2-VL-7B-Instruct"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen2-VL-7B-Instruct"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/decode.yaml"}
-export ENCODE_ENGINE_ARGS=${ENCODE_ENGINE_ARGS:-"engine_configs/encode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen2-vl-7b-instruct/trtllm/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen2-vl-7b-instruct/trtllm/decode.yaml"}
+export ENCODE_ENGINE_ARGS=${ENCODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/qwen2-vl-7b-instruct/trtllm/encode.yaml"}
 export PREFILL_CUDA_VISIBLE_DEVICES=${PREFILL_CUDA_VISIBLE_DEVICES:-"0"}
 export DECODE_CUDA_VISIBLE_DEVICES=${DECODE_CUDA_VISIBLE_DEVICES:-"1"}
 export ENCODE_CUDA_VISIBLE_DEVICES=${ENCODE_CUDA_VISIBLE_DEVICES:-"2"}
diff --git a/components/backends/trtllm/launch/gpt_oss_disagg.sh b/components/backends/trtllm/launch/gpt_oss_disagg.sh
index 606ba2a8a2..931b505804 100755
--- a/components/backends/trtllm/launch/gpt_oss_disagg.sh
+++ b/components/backends/trtllm/launch/gpt_oss_disagg.sh
@@ -3,11 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Environment variables with defaults
+export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
 export MODEL_PATH=${MODEL_PATH:-"/model"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"openai/gpt-oss-120b"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"prefill_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"recipes/gpt-oss-120b/trtllm/disagg/decode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/gpt-oss-120b/trtllm/disagg/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"$DYNAMO_HOME/recipes/gpt-oss-120b/trtllm/disagg/decode.yaml"}
 
 set -e
 trap 'echo Cleaning up...; kill 0' EXIT
diff --git a/docs/backends/trtllm/multimodal_support.md b/docs/backends/trtllm/multimodal_support.md
index a8cb246f41..0a589840b7 100644
--- a/docs/backends/trtllm/multimodal_support.md
+++ b/docs/backends/trtllm/multimodal_support.md
@@ -25,9 +25,9 @@ Please note that you should provide **either image URLs or embedding file paths*
 
 Here are quick steps to launch Llama-4 Maverick BF16 in aggregated mode
 ```bash
-cd $DYNAMO_HOME/components/backends/trtllm
+cd $DYNAMO_HOME
 
-export AGG_ENGINE_ARGS=./engine_configs/multinode/agg.yaml
+export AGG_ENGINE_ARGS=./recipes/llama4/trtllm/multimodal/agg.yaml
 export SERVED_MODEL_NAME="meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 export MODEL_PATH="meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 ./launch/agg.sh
@@ -75,13 +75,13 @@ Here are quick steps to launch in disaggregated mode.
 
 The following is an example of launching a model in disaggregated mode. While this example uses `Qwen/Qwen2-VL-7B-Instruct`, you can adapt it for other models by modifying the environment variables for the model path and engine configurations.
 ```bash
-cd $DYNAMO_HOME/components/backends/trtllm
+cd $DYNAMO_HOME
 
 export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen2-VL-7B-Instruct"}
 export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen2-VL-7B-Instruct"}
 export DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
-export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"engine_configs/multimodal/prefill.yaml"}
-export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"engine_configs/multimodal/decode.yaml"}
+export PREFILL_ENGINE_ARGS=${PREFILL_ENGINE_ARGS:-"recipes/qwen2-vl-7b-instruct/trtllm/prefill.yaml"}
+export DECODE_ENGINE_ARGS=${DECODE_ENGINE_ARGS:-"recipes/qwen2-vl-7b-instruct/trtllm/decode.yaml"}
 export MODALITY=${MODALITY:-"multimodal"}
 
 ./launch/disagg.sh
diff --git a/docs/kubernetes/README.md b/docs/kubernetes/README.md
index fb2408ba82..519796f901 100644
--- a/docs/kubernetes/README.md
+++ b/docs/kubernetes/README.md
@@ -182,7 +182,7 @@ args:
   - python3 -m dynamo.trtllm
     --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
     --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    --extra-engine-args engine_configs/agg.yaml
+    --extra-engine-args $DYNAMO_HOME/recipes/deepseek-r1-distill-llama-8b/agg.yaml
 ```
 
 Key customization points include:
diff --git a/components/backends/trtllm/engine_configs/agg.yaml b/recipes/deepseek-r1-distill-llama-8b/trtllm/agg.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/agg.yaml
rename to recipes/deepseek-r1-distill-llama-8b/trtllm/agg.yaml
diff --git a/components/backends/trtllm/engine_configs/decode.yaml b/recipes/deepseek-r1-distill-llama-8b/trtllm/decode.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/decode.yaml
rename to recipes/deepseek-r1-distill-llama-8b/trtllm/decode.yaml
diff --git a/components/backends/trtllm/engine_configs/prefill.yaml b/recipes/deepseek-r1-distill-llama-8b/trtllm/prefill.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/prefill.yaml
rename to recipes/deepseek-r1-distill-llama-8b/trtllm/prefill.yaml
diff --git a/components/backends/trtllm/engine_configs/multimodal/agg.yaml b/recipes/llama4/trtllm/multimodal/agg.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/multimodal/agg.yaml
rename to recipes/llama4/trtllm/multimodal/agg.yaml
diff --git a/recipes/qwen2-vl-7b-instruct/trtllm/agg.yaml b/recipes/qwen2-vl-7b-instruct/trtllm/agg.yaml
new file mode 100644
index 0000000000..754f8ce759
--- /dev/null
+++ b/recipes/qwen2-vl-7b-instruct/trtllm/agg.yaml
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+tensor_parallel_size: 8
+moe_expert_parallel_size: 1
+enable_attention_dp: false
+max_num_tokens: 4096
+max_batch_size: 8
+trust_remote_code: true
+backend: pytorch
+enable_chunked_prefill: true
+
+kv_cache_config:
+  free_gpu_memory_fraction: 0.3
+  enable_block_reuse: false
+
+cache_transceiver_config:
+  backend: DEFAULT
+# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
+# NOTE: overlap_scheduler enabled by default since this commit and changed
+# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
+# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
diff --git a/components/backends/trtllm/engine_configs/multimodal/decode.yaml b/recipes/qwen2-vl-7b-instruct/trtllm/decode.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/multimodal/decode.yaml
rename to recipes/qwen2-vl-7b-instruct/trtllm/decode.yaml
diff --git a/components/backends/trtllm/engine_configs/encode.yaml b/recipes/qwen2-vl-7b-instruct/trtllm/encode.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/encode.yaml
rename to recipes/qwen2-vl-7b-instruct/trtllm/encode.yaml
diff --git a/components/backends/trtllm/engine_configs/multimodal/prefill.yaml b/recipes/qwen2-vl-7b-instruct/trtllm/prefill.yaml
similarity index 100%
rename from components/backends/trtllm/engine_configs/multimodal/prefill.yaml
rename to recipes/qwen2-vl-7b-instruct/trtllm/prefill.yaml
diff --git a/recipes/qwen3/trtllm/agg.yaml b/recipes/qwen3/trtllm/agg.yaml
new file mode 100644
index 0000000000..53e0e6ce38
--- /dev/null
+++ b/recipes/qwen3/trtllm/agg.yaml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+tensor_parallel_size: 1
+moe_expert_parallel_size: 1
+enable_attention_dp: false
+max_num_tokens: 8192
+max_batch_size: 16
+trust_remote_code: true
+backend: pytorch
+enable_chunked_prefill: true
+
+kv_cache_config:
+  free_gpu_memory_fraction: 0.85
+
+# NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
+# NOTE: overlap_scheduler enabled by default since this commit and changed
+# config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
+# https://github.com/NVIDIA/TensorRT-LLM/commit/b4e5df0ee0024eda3eeb83a6ba822245a30ab428
+
+
+cuda_graph_config:
+  max_batch_size: 16
\ No newline at end of file
diff --git a/recipes/qwen3/trtllm/decode.yaml b/recipes/qwen3/trtllm/decode.yaml
new file mode 100644
index 0000000000..a0154bb6e3
--- /dev/null
+++ b/recipes/qwen3/trtllm/decode.yaml
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+tensor_parallel_size: 1
+moe_expert_parallel_size: 1
+enable_attention_dp: false
+max_num_tokens: 8192
+trust_remote_code: true
+backend: pytorch
+enable_chunked_prefill: true
+disable_overlap_scheduler: false
+
+cuda_graph_config:
+  max_batch_size: 16
+
+kv_cache_config:
+  free_gpu_memory_fraction: 0.85
+
+cache_transceiver_config:
+  backend: DEFAULT
diff --git a/recipes/qwen3/trtllm/prefill.yaml b/recipes/qwen3/trtllm/prefill.yaml
new file mode 100644
index 0000000000..4996c1fdc6
--- /dev/null
+++ b/recipes/qwen3/trtllm/prefill.yaml
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+tensor_parallel_size: 1
+moe_expert_parallel_size: 1
+enable_attention_dp: false
+max_num_tokens: 8192
+trust_remote_code: true
+backend: pytorch
+enable_chunked_prefill: true
+# Overlap scheduler not currently supported in prefill only workers.
+disable_overlap_scheduler: true
+cuda_graph_config:
+  max_batch_size: 16
+kv_cache_config:
+  free_gpu_memory_fraction: 0.85
+
+cache_transceiver_config:
+  backend: DEFAULT
\ No newline at end of file

From c499173efd575d4d8dc56bf3581b75b771adea17 Mon Sep 17 00:00:00 2001
From: Anant Sharma <anants@nvidia.com>
Date: Thu, 23 Oct 2025 08:51:14 -0700
Subject: [PATCH 6/8] rabbit

Signed-off-by: Anant Sharma <anants@nvidia.com>
---
 .../trtllm/gemma3_sliding_window_attention.md        | 12 ++++++------
 examples/basics/multinode/trtllm/srun_aggregated.sh  |  2 +-
 .../basics/multinode/trtllm/srun_disaggregated.sh    |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/backends/trtllm/gemma3_sliding_window_attention.md b/docs/backends/trtllm/gemma3_sliding_window_attention.md
index 5161332205..5226ad5338 100644
--- a/docs/backends/trtllm/gemma3_sliding_window_attention.md
+++ b/docs/backends/trtllm/gemma3_sliding_window_attention.md
@@ -30,7 +30,7 @@ VSWA is a mechanism in which a model’s layers alternate between multiple slidi
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export AGG_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_agg.yaml
+export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
 ./launch/agg.sh
 ```
 
@@ -39,7 +39,7 @@ export AGG_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_agg.yaml
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export AGG_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_agg.yaml
+export AGG_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_agg.yaml
 ./launch/agg_router.sh
 ```
 
@@ -48,8 +48,8 @@ export AGG_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_agg.yaml
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export PREFILL_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_prefill.yaml
-export DECODE_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_decode.yaml
+export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml
+export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml
 ./launch/disagg.sh
 ```
 
@@ -58,7 +58,7 @@ export DECODE_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_decode.yaml
 cd $DYNAMO_HOME/components/backends/trtllm
 export MODEL_PATH=google/gemma-3-1b-it
 export SERVED_MODEL_NAME=$MODEL_PATH
-export PREFILL_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_prefill.yaml
-export DECODE_ENGINE_ARGS=recipes/gemma3/trtllm/vswa_decode.yaml
+export PREFILL_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_prefill.yaml
+export DECODE_ENGINE_ARGS=$DYNAMO_HOME/recipes/gemma3/trtllm/vswa_decode.yaml
 ./launch/disagg_router.sh
 ```
diff --git a/examples/basics/multinode/trtllm/srun_aggregated.sh b/examples/basics/multinode/trtllm/srun_aggregated.sh
index d1645e522b..18f41160d4 100755
--- a/examples/basics/multinode/trtllm/srun_aggregated.sh
+++ b/examples/basics/multinode/trtllm/srun_aggregated.sh
@@ -10,7 +10,7 @@ IMAGE="${IMAGE:-""}"
 # but you may freely customize the mounts based on your cluster. A common practice
 # is to mount paths to NFS storage for common scripts, model weights, etc.
 # NOTE: This can be a comma separated list of multiple mounts as well.
-DEFAULT_MOUNT="${PWD}/../../../:/mnt"
+DEFAULT_MOUNT="${PWD}/../../../../:/mnt"
 MOUNTS="${MOUNTS:-${DEFAULT_MOUNT}}"
 
 # Example values, assuming 4 nodes with 4 GPUs on each node, such as 4xGB200 nodes.
diff --git a/examples/basics/multinode/trtllm/srun_disaggregated.sh b/examples/basics/multinode/trtllm/srun_disaggregated.sh
index 5c8e5bd755..9d55784084 100755
--- a/examples/basics/multinode/trtllm/srun_disaggregated.sh
+++ b/examples/basics/multinode/trtllm/srun_disaggregated.sh
@@ -10,7 +10,7 @@ IMAGE="${IMAGE:-""}"
 # but you may freely customize the mounts based on your cluster. A common practice
 # is to mount paths to NFS storage for common scripts, model weights, etc.
 # NOTE: This can be a comma separated list of multiple mounts as well.
-DEFAULT_MOUNT="${PWD}/../../../:/mnt"
+DEFAULT_MOUNT="${PWD}/../../../../:/mnt"
 MOUNTS="${MOUNTS:-${DEFAULT_MOUNT}}"
 
 NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE:-4}

From d9d40d280ee8caf1d462c03cc77344523dda3096 Mon Sep 17 00:00:00 2001
From: Anant Sharma <anants@nvidia.com>
Date: Thu, 23 Oct 2025 09:01:56 -0700
Subject: [PATCH 7/8] more rabbit

Signed-off-by: Anant Sharma <anants@nvidia.com>
---
 docs/kubernetes/README.md                              | 2 +-
 examples/basics/multinode/trtllm/srun_aggregated.sh    | 4 ++--
 examples/basics/multinode/trtllm/srun_disaggregated.sh | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/kubernetes/README.md b/docs/kubernetes/README.md
index 46dea4ad80..844c3858dd 100644
--- a/docs/kubernetes/README.md
+++ b/docs/kubernetes/README.md
@@ -203,7 +203,7 @@ args:
   - python3 -m dynamo.trtllm
     --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
     --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    --extra-engine-args $DYNAMO_HOME/recipes/deepseek-r1-distill-llama-8b/agg.yaml
+    --extra-engine-args /workspace/recipes/deepseek-r1-distill-llama-8b/agg.yaml
 ```
 
 Key customization points include:
diff --git a/examples/basics/multinode/trtllm/srun_aggregated.sh b/examples/basics/multinode/trtllm/srun_aggregated.sh
index 18f41160d4..46044c9265 100755
--- a/examples/basics/multinode/trtllm/srun_aggregated.sh
+++ b/examples/basics/multinode/trtllm/srun_aggregated.sh
@@ -51,7 +51,7 @@ srun \
   --nodelist "${HEAD_NODE}" \
   --nodes 1 \
   --jobid "${SLURM_JOB_ID}" \
-  /mnt/examples/multimodal/scripts/start_frontend_services.sh &
+  /mnt/examples/basics/multinode/trtllm/start_frontend_services.sh &
 
 # NOTE: Output streamed to stdout for ease of understanding the example, but
 # in practice you would probably set `srun --output ... --error ...` to pipe
@@ -71,4 +71,4 @@ srun \
   --nodes "${NUM_NODES}" \
   --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
   --jobid "${SLURM_JOB_ID}" \
-  /mnt/examples/multimodal/scripts/start_trtllm_worker.sh &
\ No newline at end of file
+  /mnt/examples/basics/multinode/trtllm/start_trtllm_worker.sh &
\ No newline at end of file
diff --git a/examples/basics/multinode/trtllm/srun_disaggregated.sh b/examples/basics/multinode/trtllm/srun_disaggregated.sh
index 9d55784084..8b6aabf3b6 100755
--- a/examples/basics/multinode/trtllm/srun_disaggregated.sh
+++ b/examples/basics/multinode/trtllm/srun_disaggregated.sh
@@ -56,7 +56,7 @@ srun \
   --nodelist "${HEAD_NODE}" \
   --nodes 1 \
   --jobid "${SLURM_JOB_ID}" \
-  /mnt/examples/multimodal/scripts/start_frontend_services.sh &
+  /mnt/examples/basics/multinode/trtllm/start_frontend_services.sh &
 
 # NOTE: Output streamed to stdout for ease of understanding the example, but
 # in practice you would probably set `srun --output ... --error ...` to pipe
@@ -78,7 +78,7 @@ for ((i=1; i<=${NUM_PREFILL_WORKERS}; i++)); do
     --nodes "${NUM_PREFILL_NODES}" \
     --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
     --jobid "${SLURM_JOB_ID}" \
-    /mnt/examples/multimodal/scripts/start_trtllm_worker.sh &
+    /mnt/examples/basics/multinode/trtllm/start_trtllm_worker.sh &
 done
 
 for ((i=1; i<=${NUM_DECODE_WORKERS}; i++)); do
@@ -98,5 +98,5 @@ for ((i=1; i<=${NUM_DECODE_WORKERS}; i++)); do
     --nodes "${NUM_DECODE_NODES}" \
     --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
     --jobid "${SLURM_JOB_ID}" \
-    /mnt/examples/multimodal/scripts/start_trtllm_worker.sh &
+    /mnt/examples/basics/multinode/trtllm/start_trtllm_worker.sh &
 done
\ No newline at end of file

From 42bd2dfc231ff0c2e303b462ab5de028d3940ff6 Mon Sep 17 00:00:00 2001
From: Anant Sharma <anants@nvidia.com>
Date: Fri, 24 Oct 2025 08:52:43 -0700
Subject: [PATCH 8/8] add recipes to docker

Signed-off-by: Anant Sharma <anants@nvidia.com>
---
 container/Dockerfile.trtllm | 1 +
 1 file changed, 1 insertion(+)

diff --git a/container/Dockerfile.trtllm b/container/Dockerfile.trtllm
index 58af47f354..2334813543 100644
--- a/container/Dockerfile.trtllm
+++ b/container/Dockerfile.trtllm
@@ -272,6 +272,7 @@ COPY examples /workspace/examples
 COPY benchmarks /workspace/benchmarks
 COPY deploy /workspace/deploy
 COPY components/ /workspace/components/
+COPY recipes/ /workspace/recipes/
 
 # Copy attribution files
 COPY ATTRIBUTION* LICENSE /workspace/