diff --git a/README.md b/README.md
index 11e21d50504..e5ef6954feb 100644
--- a/README.md
+++ b/README.md
@@ -199,7 +199,7 @@ It is recommended to use [NGC PyTorch Container](https://catalog.ngc.nvidia.com/
 
 > [!Note]
 > Ensure that you select a PyTorch container image version that matches the version of TensorRT-LLM you are using.
-> For example, if you are using `tensorrt-llm==1.0.0rc6`, use the PyTorch container image version `25.06`.
+> For example, if you are using `tensorrt-llm==1.1.0rc3`, use the PyTorch container image version `25.06`.
 > To find the correct PyTorch container version for your desired `tensorrt-llm` release, visit the [TensorRT-LLM Dockerfile.multi](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docker/Dockerfile.multi) on GitHub. Switch to the branch that matches your `tensorrt-llm` version, and look for the `BASE_TAG` line to identify the recommended PyTorch container tag.
 
 > [!Important]
diff --git a/components/backends/trtllm/engine_configs/decode.yaml b/components/backends/trtllm/engine_configs/decode.yaml
index 3cf5476e1ed..a0154bb6e31 100644
--- a/components/backends/trtllm/engine_configs/decode.yaml
+++ b/components/backends/trtllm/engine_configs/decode.yaml
@@ -28,4 +28,4 @@ kv_cache_config:
   free_gpu_memory_fraction: 0.85
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_decode.yaml b/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_decode.yaml
index 59b9aabe984..8f0bd83919b 100644
--- a/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_decode.yaml
+++ b/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_decode.yaml
@@ -54,4 +54,4 @@ cuda_graph_config:
 print_iter_log: true
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_prefill.yaml b/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_prefill.yaml
index f44bcac1417..46494e8d68d 100644
--- a/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_prefill.yaml
+++ b/components/backends/trtllm/engine_configs/deepseek_r1/mtp/mtp_prefill.yaml
@@ -38,4 +38,4 @@ speculative_config:
   num_nextn_predict_layers: 1
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/simple/decode.yaml b/components/backends/trtllm/engine_configs/deepseek_r1/simple/decode.yaml
index 73e193c146a..28f246574be 100644
--- a/components/backends/trtllm/engine_configs/deepseek_r1/simple/decode.yaml
+++ b/components/backends/trtllm/engine_configs/deepseek_r1/simple/decode.yaml
@@ -57,4 +57,4 @@ cuda_graph_config:
 print_iter_log: true
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/simple/prefill.yaml b/components/backends/trtllm/engine_configs/deepseek_r1/simple/prefill.yaml
index 3d6d4d35740..13b2410a672 100644
--- a/components/backends/trtllm/engine_configs/deepseek_r1/simple/prefill.yaml
+++ b/components/backends/trtllm/engine_configs/deepseek_r1/simple/prefill.yaml
@@ -36,4 +36,4 @@ disable_overlap_scheduler: true
 print_iter_log: true
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml b/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml
index 652cf82250a..8f953c6472b 100644
--- a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml
+++ b/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml
@@ -63,4 +63,4 @@ cuda_graph_config:
 print_iter_log: true
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml b/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml
index 4f7aabe6824..8a756cc32b0 100644
--- a/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml
+++ b/components/backends/trtllm/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml
@@ -41,4 +41,4 @@ disable_overlap_scheduler: true
 print_iter_log: true
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/encode.yaml b/components/backends/trtllm/engine_configs/encode.yaml
index 5ac1f884108..6f0c20990f5 100644
--- a/components/backends/trtllm/engine_configs/encode.yaml
+++ b/components/backends/trtllm/engine_configs/encode.yaml
@@ -27,4 +27,4 @@ kv_cache_config:
   free_gpu_memory_fraction: 0.85
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml b/components/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
index f417ed6f0aa..c3ea683857d 100644
--- a/components/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
+++ b/components/backends/trtllm/engine_configs/gemma3/vswa_decode.yaml
@@ -26,4 +26,4 @@ kv_cache_config:
     - 32768
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml b/components/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
index cd36bfa31a3..663d241b580 100644
--- a/components/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
+++ b/components/backends/trtllm/engine_configs/gemma3/vswa_prefill.yaml
@@ -27,4 +27,4 @@ kv_cache_config:
     - 32768
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/gpt_oss/decode.yaml b/components/backends/trtllm/engine_configs/gpt_oss/decode.yaml
index e3703824c14..1ba98445458 100644
--- a/components/backends/trtllm/engine_configs/gpt_oss/decode.yaml
+++ b/components/backends/trtllm/engine_configs/gpt_oss/decode.yaml
@@ -19,7 +19,7 @@ moe_config:
 cuda_graph_config:
     enable_padding: true
 cache_transceiver_config:
-  backend: ucx
+  backend: UCX
   max_tokens_in_buffer: 65536
 print_iter_log: false
 stream_interval: 10
diff --git a/components/backends/trtllm/engine_configs/gpt_oss/prefill.yaml b/components/backends/trtllm/engine_configs/gpt_oss/prefill.yaml
index 07d979a6fd1..87bab09fd48 100644
--- a/components/backends/trtllm/engine_configs/gpt_oss/prefill.yaml
+++ b/components/backends/trtllm/engine_configs/gpt_oss/prefill.yaml
@@ -21,7 +21,7 @@ cuda_graph_config:
     max_batch_size: 32
     enable_padding: true
 cache_transceiver_config:
-  backend: ucx
+  backend: UCX
   max_tokens_in_buffer: 65536
 print_iter_log: false
 stream_interval: 10
diff --git a/components/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml b/components/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
index 171df484d8f..019cac5ac64 100644
--- a/components/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
+++ b/components/backends/trtllm/engine_configs/llama4/eagle/eagle_decode.yaml
@@ -49,4 +49,4 @@ cuda_graph_config:
 print_iter_log: true
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml b/components/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
index ce3059f0b49..5b978deece1 100644
--- a/components/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
+++ b/components/backends/trtllm/engine_configs/llama4/eagle/eagle_prefill.yaml
@@ -34,4 +34,4 @@ kv_cache_config:
   enable_block_reuse: false
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
diff --git a/components/backends/trtllm/engine_configs/multimodal/agg.yaml b/components/backends/trtllm/engine_configs/multimodal/agg.yaml
index a2b90336792..754f8ce759d 100644
--- a/components/backends/trtllm/engine_configs/multimodal/agg.yaml
+++ b/components/backends/trtllm/engine_configs/multimodal/agg.yaml
@@ -26,7 +26,7 @@ kv_cache_config:
   enable_block_reuse: false
 
 cache_transceiver_config:
-  backend: default
+  backend: DEFAULT
 # NOTE: pytorch_backend_config section flattened since: https://github.com/NVIDIA/TensorRT-LLM/pull/4603
 # NOTE: overlap_scheduler enabled by default since this commit and changed
 # config field from 'enable_overlap_scheduler' to 'disable_overlap_scheduler':
diff --git a/components/backends/trtllm/engine_configs/multimodal/decode.yaml b/components/backends/trtllm/engine_configs/multimodal/decode.yaml
index bd90c0b62ee..6dbd676ee44 100644
--- a/components/backends/trtllm/engine_configs/multimodal/decode.yaml
+++ b/components/backends/trtllm/engine_configs/multimodal/decode.yaml
@@ -26,4 +26,4 @@ kv_cache_config:
   enable_block_reuse: false
 
 cache_transceiver_config:
-  backend: default
\ No newline at end of file
+  backend: DEFAULT
\ No newline at end of file
diff --git a/components/backends/trtllm/engine_configs/multimodal/llama4/decode.yaml b/components/backends/trtllm/engine_configs/multimodal/llama4/decode.yaml
index e94d3ee0d80..262a2be1cc9 100644
--- a/components/backends/trtllm/engine_configs/multimodal/llama4/decode.yaml
+++ b/components/backends/trtllm/engine_configs/multimodal/llama4/decode.yaml
@@ -26,4 +26,4 @@ kv_cache_config:
   enable_block_reuse: false
 
 cache_transceiver_config:
-  backend: default
\ No newline at end of file
+  backend: DEFAULT
\ No newline at end of file
diff --git a/components/backends/trtllm/engine_configs/multimodal/llama4/prefill.yaml b/components/backends/trtllm/engine_configs/multimodal/llama4/prefill.yaml
index 23f54130c47..3d2c1440156 100644
--- a/components/backends/trtllm/engine_configs/multimodal/llama4/prefill.yaml
+++ b/components/backends/trtllm/engine_configs/multimodal/llama4/prefill.yaml
@@ -28,4 +28,4 @@ kv_cache_config:
   enable_block_reuse: false
 
 cache_transceiver_config:
-  backend: default
\ No newline at end of file
+  backend: DEFAULT
\ No newline at end of file
diff --git a/components/backends/trtllm/engine_configs/multimodal/prefill.yaml b/components/backends/trtllm/engine_configs/multimodal/prefill.yaml
index d3ad035541b..83a65e8bf30 100644
--- a/components/backends/trtllm/engine_configs/multimodal/prefill.yaml
+++ b/components/backends/trtllm/engine_configs/multimodal/prefill.yaml
@@ -28,4 +28,4 @@ kv_cache_config:
   enable_block_reuse: false
 
 cache_transceiver_config:
-  backend: default
\ No newline at end of file
+  backend: DEFAULT
\ No newline at end of file
diff --git a/components/backends/trtllm/engine_configs/prefill.yaml b/components/backends/trtllm/engine_configs/prefill.yaml
index a7b8d3aaa01..4996c1fdc61 100644
--- a/components/backends/trtllm/engine_configs/prefill.yaml
+++ b/components/backends/trtllm/engine_configs/prefill.yaml
@@ -27,4 +27,4 @@ kv_cache_config:
   free_gpu_memory_fraction: 0.85
 
 cache_transceiver_config:
-  backend: default
\ No newline at end of file
+  backend: DEFAULT
\ No newline at end of file
diff --git a/components/backends/trtllm/multimodal_support.md b/components/backends/trtllm/multimodal_support.md
index 25fbf7130c3..5fb29038a46 100644
--- a/components/backends/trtllm/multimodal_support.md
+++ b/components/backends/trtllm/multimodal_support.md
@@ -14,24 +14,6 @@ limitations under the License.
 
 # Multimodal Support
 
-> [!Important]
-> There are some known issues in tensorrt_llm==1.0.0rc6 version for multimodal support
-> It is important to rebuild the dynamo container with a specific version of tensorrt_llm
-> commit to use multimodal feature.
-## Build Container
-
-```bash
-./container/build.sh --framework trtllm --tensorrtllm-commit b4065d8ca64a64eee9fdc64b39cb66d73d4be47c
-```
-
-## Run Container
-
-```bash
-./container/run.sh --framework trtllm -it
-```
-
-## Usage Guide
-
 TRTLLM supports multimodal models with dynamo. You can provide multimodal inputs in the following ways:
 
 - By sending image URLs
diff --git a/components/backends/trtllm/src/dynamo/trtllm/main.py b/components/backends/trtllm/src/dynamo/trtllm/main.py
index d8b35eb5f5b..53b5a73e554 100644
--- a/components/backends/trtllm/src/dynamo/trtllm/main.py
+++ b/components/backends/trtllm/src/dynamo/trtllm/main.py
@@ -8,7 +8,6 @@
 import sys
 
 import uvloop
-from tensorrt_llm import SamplingParams
 from tensorrt_llm.llmapi import (
     BuildConfig,
     CapacitySchedulerPolicy,
@@ -16,6 +15,7 @@
     KvCacheConfig,
     SchedulerConfig,
 )
+from tensorrt_llm.llmapi.llm import SamplingParams
 from tensorrt_llm.llmapi.llm_utils import update_llm_args_with_extra_options
 from tensorrt_llm.llmapi.tokenizer import tokenizer_factory
 from torch.cuda import device_count
diff --git a/components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py b/components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
index 79f3d559209..ae9893b5396 100644
--- a/components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
+++ b/components/backends/trtllm/src/dynamo/trtllm/request_handlers/handler_base.py
@@ -21,8 +21,8 @@
 from typing import Optional, Union
 
 import torch
-from tensorrt_llm import SamplingParams
 from tensorrt_llm.llmapi import DisaggregatedParams as LlmDisaggregatedParams
+from tensorrt_llm.llmapi.llm import SamplingParams
 
 from dynamo.logits_processing.examples import HelloWorldLogitsProcessor
 from dynamo.nixl_connect import Connector
diff --git a/container/Dockerfile b/container/Dockerfile
index eb296eeaa7b..3308c077bc7 100644
--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -1,3 +1,4 @@
+# syntax=docker/dockerfile:1.10.0
 # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
@@ -35,8 +36,6 @@ ARG ARCH_ALT=x86_64
 ARG USE_SCCACHE
 ARG SCCACHE_BUCKET=""
 ARG SCCACHE_REGION=""
-ARG AWS_ACCESS_KEY_ID=""
-ARG AWS_SECRET_ACCESS_KEY=""
 
 # NIXL configuration
 ARG NIXL_UCX_REF=v1.19.0
@@ -58,8 +57,6 @@ ARG PYTHON_VERSION
 ARG USE_SCCACHE
 ARG SCCACHE_BUCKET
 ARG SCCACHE_REGION
-ARG AWS_ACCESS_KEY_ID
-ARG AWS_SECRET_ACCESS_KEY
 ARG NIXL_UCX_REF
 ARG NIXL_REF
 
@@ -164,7 +161,9 @@ ENV PATH=/usr/local/bin/etcd/:$PATH
 ##################################
 
 # Build and install UCX
-RUN rm -rf /opt/hpcx/ucx && \
+RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    rm -rf /opt/hpcx/ucx && \
     rm -rf /usr/local/ucx && \
     echo "Building UCX with reference $NIXL_UCX_REF" && \
     cd /usr/local/src && \
@@ -214,7 +213,9 @@ ENV NIXL_SRC_DIR=/opt/nixl \
     NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
 
 # Build and install NIXL
-RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
+RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
     cd ${NIXL_SRC_DIR} && \
     if [ "$ARCH" = "arm64" ]; then \
         nixl_build_args="-Ddisable_gds_backend=true"; \
@@ -230,7 +231,9 @@ RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.
 
 # Build NIXL Python module
 # TODO OPS-590: Move gds_path selection based on arch into NIXL build and re-enable gds backend for arm64
-RUN if [ "$ARCH" = "arm64" ]; then \
+RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    if [ "$ARCH" = "arm64" ]; then \
         cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl \
         --config-settings=setup-args="-Ddisable_gds_backend=true"; \
     else \
@@ -272,8 +275,6 @@ ARG ENABLE_KVBM
 ARG USE_SCCACHE
 ARG SCCACHE_BUCKET
 ARG SCCACHE_REGION
-ARG AWS_ACCESS_KEY_ID
-ARG AWS_SECRET_ACCESS_KEY
 
 WORKDIR /opt/dynamo
 
@@ -317,7 +318,9 @@ COPY lib/ /opt/dynamo/lib/
 COPY components/ /opt/dynamo/components/
 
 # Build wheels
-RUN uv build --wheel --out-dir /opt/dynamo/dist && \
+RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    uv build --wheel --out-dir /opt/dynamo/dist && \
     cd /opt/dynamo/lib/bindings/python && \
     uv pip install maturin[patchelf] && \
     if [ "$ENABLE_KVBM" = "true" ]; then \
diff --git a/container/Dockerfile.trtllm b/container/Dockerfile.trtllm
index 6bf259e2a32..b8a1f5b0407 100644
--- a/container/Dockerfile.trtllm
+++ b/container/Dockerfile.trtllm
@@ -140,7 +140,6 @@ COPY --from=trtllm_wheel . /trtllm_wheel/
 # Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
 # because there might be mismatched versions of TensorRT between the NGC PyTorch
 # and the TRTLLM wheel.
-# Locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc6
 RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
     pip uninstall -y tensorrt && \
     if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
@@ -148,9 +147,6 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
         WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
         if [ -n "$WHEEL_FILE" ]; then \
             pip install "$WHEEL_FILE"; \
-            if [ "$ARCH" = "amd64" ]; then \
-                pip install "triton==3.3.1"; \
-            fi; \
         else \
             echo "No wheel file found in /trtllm_wheel directory."; \
             exit 1; \
@@ -158,9 +154,6 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
     else \
         # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
         pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
-        if [ "$ARCH" = "amd64" ]; then \
-            pip install "triton==3.3.1"; \
-        fi; \
     fi
 
 # Install test dependencies
@@ -477,12 +470,7 @@ COPY --from=dev /workspace/target/release/metrics /usr/local/bin/metrics
 # NOTE: If a package (tensorrt_llm) exists on both --index-url and --extra-index-url,
 # uv will prioritize the --extra-index-url, unless --index-strategy unsafe-best-match
 # is also specified. So set the configurable index as a --extra-index-url for prioritization.
-# NOTE: locking triton version to 3.3.1 as 3.4.0 breaks tensorrt-llm 1.0.0rc6
-# NOTE: locking cuda-python version to <13 to avoid breaks with tensorrt-llm 1.0.0rc6. This
-#       can be removed after https://github.com/NVIDIA/TensorRT-LLM/pull/6703 is merged
-#       we upgrade to a published pip wheel containing this change.
-RUN python3 -m pip install --no-cache-dir --break-system-packages "cuda-python>=12,<13" && \
-    python3 -m pip install --no-cache-dir --break-system-packages --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
+RUN python3 -m pip install --no-cache-dir --break-system-packages --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}" && \
     python3 -m pip install --no-cache-dir --break-system-packages \
         /workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \
         /workspace/wheelhouse/ai_dynamo*any.whl \
diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm
index 1503da9a275..9061c6607ff 100644
--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -1,3 +1,4 @@
+# syntax=docker/dockerfile:1.10.0
 # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
@@ -20,8 +21,6 @@ ARG TORCH_BACKEND="cu128"
 ARG USE_SCCACHE
 ARG SCCACHE_BUCKET=""
 ARG SCCACHE_REGION=""
-ARG AWS_ACCESS_KEY_ID=""
-ARG AWS_SECRET_ACCESS_KEY=""
 
 # Match 0.10.1.1 vLLM release
 # https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1
@@ -121,8 +120,6 @@ ARG USE_SCCACHE
 ARG ARCH_ALT
 ARG SCCACHE_BUCKET
 ARG SCCACHE_REGION
-ARG AWS_ACCESS_KEY_ID=""
-ARG AWS_SECRET_ACCESS_KEY=""
 
 ENV ARCH_ALT=${ARCH_ALT}
 RUN if [ "$USE_SCCACHE" = "true" ]; then \
@@ -139,6 +136,8 @@ ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
 # Install VLLM and related dependencies
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
     --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
         # TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
         # Should be able to select how you want your build to go
         cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
diff --git a/container/build.sh b/container/build.sh
index 08849b3b02d..a095db4c9aa 100755
--- a/container/build.sh
+++ b/container/build.sh
@@ -89,7 +89,7 @@ TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
 # TensorRT-LLM commit to use for building the trtllm wheel if not provided.
 # Important Note: This commit is not used in our CI pipeline. See the CI
 # variables to learn how to run a pipeline with a specific commit.
-DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="a16ba6445c61ed70e7aadfe787d6f316bb422652"
+DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="e81c50dbd2811ec858eccc2c71b5e7a330ff7e24"
 TRTLLM_COMMIT=""
 TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
 TRTLLM_GIT_URL=""
@@ -98,7 +98,7 @@ TRTLLM_GIT_URL=""
 TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
 # TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
 # Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
-DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.0.0rc6"
+DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.1.0rc3"
 TENSORRTLLM_PIP_WHEEL=""
 
 
@@ -602,8 +602,8 @@ if [ "$USE_SCCACHE" = true ]; then
     BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
     BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
     BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
-    BUILD_ARGS+=" --build-arg AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}"
-    BUILD_ARGS+=" --build-arg AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}"
+    BUILD_ARGS+=" --secret id=aws-key-id,env=AWS_ACCESS_KEY_ID"
+    BUILD_ARGS+=" --secret id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY"
 fi
 
 LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
diff --git a/docs/support_matrix.md b/docs/support_matrix.md
index f6019c003aa..c6dc81858ef 100644
--- a/docs/support_matrix.md
+++ b/docs/support_matrix.md
@@ -67,7 +67,7 @@ If you are using a **GPU**, the following GPU models and architectures are suppo
 | **Build Dependency** | **Version**                                                                      |
 | :------------------- | :------------------------------------------------------------------------------- |
 | **Base Container**   | [25.03](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda-dl-base/tags) |
-| **TensorRT-LLM**     | 1.0.0rc6                                                                         |
+| **TensorRT-LLM**     | 1.1.0rc3                                                                         |
 | **NIXL**             | 0.4.1                                                                            |
 | **vLLM**             | 0.10.1.1                                                                         |
 | **SGLang**           | 0.5.0rc2                                                                         |
diff --git a/pyproject.toml b/pyproject.toml
index 091b2910577..f8878cf55a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,8 +48,7 @@ Repository = "https://github.com/ai-dynamo/dynamo.git"
 [project.optional-dependencies]
 trtllm =[
     "uvloop",
-    "tensorrt-llm==1.0.0rc6",
-    "triton==3.3.1",  # locking triton as version 3.4.0 breaks tensorrt-llm 1.0.0rc6
+    "tensorrt-llm==1.1.0rc3",
 ]
 
 vllm = [