ai-dynamo · saturley-hall · Sep 24, 2025 · Sep 16, 2025 · Sep 24, 2025 · Sep 24, 2025
diff --git a/components/backends/sglang/deploy/README.md b/components/backends/sglang/deploy/README.md
@@ -92,7 +92,7 @@ Edit the template to match your environment:
 
 ```yaml
 # Update image registry and tag
-image: your-registry/sglang-runtime:your-tag
+image: my-registry/sglang-runtime:my-tag
 
 # Configure your model
 args:

diff --git a/components/backends/trtllm/deploy/README.md b/components/backends/trtllm/deploy/README.md
@@ -141,7 +141,7 @@ Edit the template to match your environment:
 
 ```yaml
 # Update image registry and tag
-image: your-registry/trtllm-runtime:your-tag
+image: my-registry/trtllm-runtime:my-tag
 
 # Configure your model and deployment settings
 args:

diff --git a/components/backends/trtllm/deploy/agg-with-config.yaml b/components/backends/trtllm/deploy/agg-with-config.yaml
@@ -34,7 +34,7 @@ spec:
       replicas: 1
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
     TRTLLMWorker:
       envFromSecret: hf-token-secret
       dynamoNamespace: trtllm-agg
@@ -50,7 +50,7 @@ spec:
           configMap:
             name: nvidia-config
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
           workingDir: /workspace/components/backends/trtllm
           # mount the configmap as a volume
           volumeMounts:

diff --git a/components/backends/vllm/deploy/README.md b/components/backends/vllm/deploy/README.md
@@ -116,7 +116,7 @@ Edit the template to match your environment:
 
 ```yaml
 # Update image registry and tag
-image: your-registry/vllm-runtime:your-tag
+image: my-registry/vllm-runtime:my-tag
 
 # Configure your model
 args:

diff --git a/components/backends/vllm/deploy/agg.yaml b/components/backends/vllm/deploy/agg.yaml
@@ -13,7 +13,7 @@ spec:
       replicas: 1
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
     VllmDecodeWorker:
       envFromSecret: hf-token-secret
       dynamoNamespace: vllm-agg
@@ -24,7 +24,7 @@ spec:
           gpu: "1"
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
           command:
             - /bin/sh

diff --git a/components/backends/vllm/deploy/agg_router.yaml b/components/backends/vllm/deploy/agg_router.yaml
@@ -13,7 +13,7 @@ spec:
       replicas: 1
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
       envs:
         - name: DYN_ROUTER_MODE
           value: kv
@@ -27,7 +27,7 @@ spec:
           gpu: "1"
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
           command:
             - /bin/sh

diff --git a/components/backends/vllm/deploy/disagg.yaml b/components/backends/vllm/deploy/disagg.yaml
@@ -13,7 +13,7 @@ spec:
       replicas: 1
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
     VllmDecodeWorker:
       dynamoNamespace: vllm-disagg
       envFromSecret: hf-token-secret
@@ -24,7 +24,7 @@ spec:
           gpu: "1"
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
           command:
             - /bin/sh
@@ -41,7 +41,7 @@ spec:
           gpu: "1"
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
           command:
             - /bin/sh

diff --git a/components/backends/vllm/deploy/disagg_planner.yaml b/components/backends/vllm/deploy/disagg_planner.yaml
@@ -22,7 +22,7 @@ spec:
       replicas: 1
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
     Planner:
       dynamoNamespace: vllm-disagg-planner
       envFromSecret: hf-token-secret
@@ -53,7 +53,7 @@ spec:
         mountPoint: /data
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/planner/src/dynamo/planner
           command:
             - /bin/sh
@@ -93,7 +93,7 @@ spec:
         failureThreshold: 10
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
           command:
             - /bin/sh
@@ -116,7 +116,7 @@ spec:
               port: 9090
             periodSeconds: 10
             failureThreshold: 60
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
           command:
             - python3
@@ -141,7 +141,7 @@ spec:
               port: 9090
             periodSeconds: 10
             failureThreshold: 60
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
           command:
             - python3

diff --git a/components/backends/vllm/deploy/disagg_router.yaml b/components/backends/vllm/deploy/disagg_router.yaml
@@ -13,7 +13,7 @@ spec:
       replicas: 1
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
       envs:
         - name: DYN_ROUTER_MODE
           value: kv
@@ -27,7 +27,7 @@ spec:
           gpu: "1"
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
           command:
             - /bin/sh
@@ -44,7 +44,7 @@ spec:
           gpu: "1"
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
           command:
             - /bin/sh

@@ -73,7 +73,7 @@ eppAware:
     # Container name for the sidecar
     name: frontend-router
     # Sidecar image
-    image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+    image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
     # Image pull policy for the sidecar
     imagePullPolicy: IfNotPresent
     # Command and args for running the frontend in router mode.

diff --git a/docs/_includes/install.rst b/docs/_includes/install.rst
@@ -10,7 +10,7 @@ Install a pre-built wheel from PyPI.
    source venv/bin/activate
 
    # Install Dynamo from PyPI (choose one backend extra)
-   uv pip install "ai-dynamo[sglang]==0.4.1"  # or [vllm], [trtllm]
+   uv pip install "ai-dynamo[sglang]==my-tag"  # or [vllm], [trtllm]
 
 
 Pip from source
@@ -41,4 +41,4 @@ Pull and run prebuilt images from NVIDIA NGC (`nvcr.io`).
    docker run --rm -it \
      --gpus all \
      --network host \
-     nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.4.1  # or vllm, tensorrtllm
+     nvcr.io/nvidia/ai-dynamo/sglang-runtime:my-tag  # or vllm, tensorrtllm
diff --git a/docs/benchmarks/pre_deployment_profiling.md b/docs/benchmarks/pre_deployment_profiling.md
@@ -151,7 +151,7 @@ spec:
 
 1. **Set the container image:**
    ```bash
-   export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag (TODO: update to 0.5.0 upon release as profiling with 0.4.1 is broken)
+   export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
    ```
 
 2. **Set the config path for the profiling job:**

@@ -16,7 +16,7 @@ spec:
         mountPoint: /root/.cache/huggingface
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
       replicas: 1
     VllmPrefillWorker:
@@ -36,7 +36,7 @@ spec:
           command:
           - /bin/sh
           - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
       replicas: 1
       resources:

@@ -16,7 +16,7 @@ spec:
       restartPolicy: Never
       containers:
       - name: perf
-        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
         workingDir: /workspace/components/backends/vllm
         command:
         - /bin/sh

@@ -16,7 +16,7 @@ spec:
         mountPoint: /root/.cache/huggingface
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
       replicas: 1
     VllmPrefillWorker:
@@ -36,7 +36,7 @@ spec:
           command:
           - /bin/sh
           - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
       replicas: 1
       resources:
@@ -61,7 +61,7 @@ spec:
           command:
           - /bin/sh
           - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
       replicas: 1
       resources:

@@ -16,7 +16,7 @@ spec:
       restartPolicy: Never
       containers:
       - name: perf
-        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
         workingDir: /workspace/components/backends/vllm
         command:
         - /bin/sh

@@ -16,7 +16,7 @@ spec:
         mountPoint: /root/.cache/huggingface
       extraPodSpec:
         mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
       replicas: 1
     VllmPrefillWorker:
@@ -46,7 +46,7 @@ spec:
           command:
           - /bin/sh
           - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
       replicas: 2
       resources:
@@ -81,7 +81,7 @@ spec:
           command:
           - /bin/sh
           - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
           workingDir: /workspace/components/backends/vllm
       replicas: 1
       resources:

@@ -16,7 +16,7 @@ spec:
       restartPolicy: Never
       containers:
       - name: perf
-        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
         workingDir: /workspace/components/backends/vllm
         command:
         - /bin/sh