ai-dynamo · saturley-hall · Aug 5, 2025 · Jul 30, 2025 · Aug 4, 2025 · Aug 4, 2025
diff --git a/README.md b/README.md
@@ -27,6 +27,10 @@ limitations under the License.
 
 High-throughput, low-latency inference framework designed for serving generative AI and reasoning models in multi-node distributed environments.
 
+## Latest News
+
+* [08/05] Deploy `openai/gpt-oss-120b` with disaggregated serving on NVIDIA Blackwell GPUs using Dynamo [➡️ link](./components/backends/trtllm/gpt-oss.md)
+
 ## The Era of Multi-GPU, Multi-Node
 
 <p align="center">

diff --git a/components/backends/trtllm/engine_configs/gpt_oss/decode.yaml b/components/backends/trtllm/engine_configs/gpt_oss/decode.yaml
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+enable_attention_dp: true
+disable_overlap_scheduler: false
+moe_config:
+    backend: CUTLASS
+cuda_graph_config:
+    max_batch_size: 128
+    enable_padding: true
+cache_transceiver_config:
+  backend: ucx
+  max_tokens_in_buffer: 65536
+print_iter_log: false
+stream_interval: 10
diff --git a/components/backends/trtllm/engine_configs/gpt_oss/prefill.yaml b/components/backends/trtllm/engine_configs/gpt_oss/prefill.yaml
@@ -0,0 +1,27 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+enable_attention_dp: false
+disable_overlap_scheduler: true
+moe_config:
+    backend: CUTLASS
+enable_chunked_prefill: true
+cuda_graph_config:
+    max_batch_size: 32
+    enable_padding: true
+cache_transceiver_config:
+  backend: ucx
+  max_tokens_in_buffer: 65536
+print_iter_log: false
+stream_interval: 10