Skip to content
2 changes: 1 addition & 1 deletion components/backends/sglang/deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ Edit the template to match your environment:

```yaml
# Update image registry and tag
image: your-registry/sglang-runtime:your-tag
image: my-registry/sglang-runtime:my-tag

# Configure your model
args:
Expand Down
2 changes: 1 addition & 1 deletion components/backends/trtllm/deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ Edit the template to match your environment:

```yaml
# Update image registry and tag
image: your-registry/trtllm-runtime:your-tag
image: my-registry/trtllm-runtime:my-tag

# Configure your model and deployment settings
args:
Expand Down
4 changes: 2 additions & 2 deletions components/backends/trtllm/deploy/agg-with-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
Expand All @@ -50,7 +50,7 @@ spec:
configMap:
name: nvidia-config
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
# mount the configmap as a volume
volumeMounts:
Expand Down
2 changes: 1 addition & 1 deletion components/backends/vllm/deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ Edit the template to match your environment:

```yaml
# Update image registry and tag
image: your-registry/vllm-runtime:your-tag
image: my-registry/vllm-runtime:my-tag

# Configure your model
args:
Expand Down
4 changes: 2 additions & 2 deletions components/backends/vllm/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg
Expand All @@ -24,7 +24,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand Down
4 changes: 2 additions & 2 deletions components/backends/vllm/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
envs:
- name: DYN_ROUTER_MODE
value: kv
Expand All @@ -27,7 +27,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand Down
6 changes: 3 additions & 3 deletions components/backends/vllm/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
Expand All @@ -24,7 +24,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand All @@ -41,7 +41,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand Down
10 changes: 5 additions & 5 deletions components/backends/vllm/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
Planner:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
Expand Down Expand Up @@ -53,7 +53,7 @@ spec:
mountPoint: /data
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner
command:
- /bin/sh
Expand Down Expand Up @@ -93,7 +93,7 @@ spec:
failureThreshold: 10
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand All @@ -116,7 +116,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- python3
Expand All @@ -141,7 +141,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- python3
Expand Down
6 changes: 3 additions & 3 deletions components/backends/vllm/deploy/disagg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
envs:
- name: DYN_ROUTER_MODE
value: kv
Expand All @@ -27,7 +27,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand All @@ -44,7 +44,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand Down
2 changes: 1 addition & 1 deletion deploy/inference-gateway/helm/dynamo-gaie/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ eppAware:
# Container name for the sidecar
name: frontend-router
# Sidecar image
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
# Image pull policy for the sidecar
imagePullPolicy: IfNotPresent
# Command and args for running the frontend in router mode.
Expand Down
4 changes: 2 additions & 2 deletions docs/_includes/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Install a pre-built wheel from PyPI.
source venv/bin/activate

# Install Dynamo from PyPI (choose one backend extra)
uv pip install "ai-dynamo[sglang]==0.4.1" # or [vllm], [trtllm]
uv pip install "ai-dynamo[sglang]==my-tag" # or [vllm], [trtllm]


Pip from source
Expand Down Expand Up @@ -41,4 +41,4 @@ Pull and run prebuilt images from NVIDIA NGC (`nvcr.io`).
docker run --rm -it \
--gpus all \
--network host \
nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.4.1 # or vllm, tensorrtllm
nvcr.io/nvidia/ai-dynamo/sglang-runtime:my-tag # or vllm, tensorrtllm
2 changes: 1 addition & 1 deletion docs/benchmarks/pre_deployment_profiling.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ spec:

1. **Set the container image:**
```bash
export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag (TODO: update to 0.5.0 upon release as profiling with 0.4.1 is broken)
export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
```

2. **Set the config path for the profiling job:**
Expand Down
4 changes: 2 additions & 2 deletions recipes/llama-3-70b/vllm/agg/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
mountPoint: /root/.cache/huggingface
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
VllmPrefillWorker:
Expand All @@ -36,7 +36,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
resources:
Expand Down
2 changes: 1 addition & 1 deletion recipes/llama-3-70b/vllm/agg/perf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
restartPolicy: Never
containers:
- name: perf
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand Down
6 changes: 3 additions & 3 deletions recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
mountPoint: /root/.cache/huggingface
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
VllmPrefillWorker:
Expand All @@ -36,7 +36,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
resources:
Expand All @@ -61,7 +61,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
resources:
Expand Down
2 changes: 1 addition & 1 deletion recipes/llama-3-70b/vllm/disagg-multi-node/perf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
restartPolicy: Never
containers:
- name: perf
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand Down
6 changes: 3 additions & 3 deletions recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
mountPoint: /root/.cache/huggingface
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
VllmPrefillWorker:
Expand Down Expand Up @@ -46,7 +46,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 2
resources:
Expand Down Expand Up @@ -81,7 +81,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
resources:
Expand Down
2 changes: 1 addition & 1 deletion recipes/llama-3-70b/vllm/disagg-single-node/perf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
restartPolicy: Never
containers:
- name: perf
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
Expand Down
Loading