Skip to content
5 changes: 5 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ The benchmarking framework supports:
- Customizable concurrency levels (configurable via CONCURRENCIES env var), sequence lengths, and models
- Automated performance plot generation with custom labels

**Sequential GPU Usage:**
- Models are deployed and benchmarked **sequentially**, not in parallel
- Each deployment gets exclusive access to all available GPUs during its benchmark run
- Ensures accurate performance measurements and fair comparison across configurations

**Supported Backends:**
- DynamoGraphDeployments
- External HTTP endpoints (for comparison with non-Dynamo backends)
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ DYNAMO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"

# Configuration - all set via command line arguments
NAMESPACE=""
MODEL="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
MODEL="Qwen/Qwen3-0.6B"
ISL=2000
STD=10
OSL=256
Expand Down Expand Up @@ -46,7 +46,7 @@ REQUIRED:

OPTIONS:
-h, --help Show this help message
-m, --model MODEL Model name for GenAI-Perf configuration and logging (default: deepseek-ai/DeepSeek-R1-Distill-Llama-8B)
-m, --model MODEL Model name for GenAI-Perf configuration and logging (default: Qwen/Qwen3-0.6B)
NOTE: This must match the model configured in your deployment manifests and the model deployed in any endpoints.
-i, --isl LENGTH Input sequence length (default: $ISL)
-s, --std STDDEV Input sequence standard deviation (default: $STD)
Expand Down
11 changes: 3 additions & 8 deletions benchmarks/profiler/deploy/profile_sla_job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ spec:
command: ["python", "-m", "benchmarks.profiler.profile_sla"]
args:
- --config
- /workspace/configs/disagg.yaml
- /data/configs/disagg.yaml
- --output-dir
- /workspace/profiling_results
- /data/profiling_results
- --namespace
- ${NAMESPACE}
- --backend
Expand All @@ -50,15 +50,10 @@ spec:
- "20"
volumeMounts:
- name: output-volume
mountPath: /workspace/profiling_results
- name: configs
mountPath: /workspace/configs
mountPath: /data
restartPolicy: Never
volumes:
- name: output-volume
persistentVolumeClaim:
claimName: dynamo-pvc
- name: configs
persistentVolumeClaim:
claimName: dynamo-pvc
backoffLimit: 0
2 changes: 2 additions & 0 deletions benchmarks/profiler/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
6 changes: 3 additions & 3 deletions benchmarks/utils/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,17 @@ def main() -> int:
help="Input in format <label>=<manifest_path_or_endpoint>. Can be specified multiple times for comparisons.",
)
parser.add_argument("--namespace", required=True, help="Kubernetes namespace")
parser.add_argument("--isl", type=int, default=200, help="Input sequence length")
parser.add_argument("--isl", type=int, default=2000, help="Input sequence length")
parser.add_argument(
"--std",
type=int,
default=10,
help="Input sequence standard deviation",
)
parser.add_argument("--osl", type=int, default=200, help="Output sequence length")
parser.add_argument("--osl", type=int, default=256, help="Output sequence length")
parser.add_argument(
"--model",
default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
default="Qwen/Qwen3-0.6B",
help="Model name",
)
parser.add_argument(
Expand Down
4 changes: 2 additions & 2 deletions components/backends/sglang/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ spec:
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /workspace/profiling_results
mountPoint: /data/profiling_results
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
Expand All @@ -62,7 +62,7 @@ spec:
--environment=kubernetes
--backend=sglang
--adjustment-interval=60
--profile-results-dir=/workspace/profiling_results
--profile-results-dir=/data/profiling_results
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: dynamo
componentType: frontend
Expand Down
4 changes: 2 additions & 2 deletions components/backends/vllm/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ spec:
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /workspace/profiling_results
mountPoint: /data/profiling_results
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
Expand All @@ -62,7 +62,7 @@ spec:
--environment=kubernetes
--backend=vllm
--adjustment-interval=60
--profile-results-dir=/workspace/profiling_results
--profile-results-dir=/data/profiling_results
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: vllm-disagg-planner
componentType: frontend
Expand Down
21 changes: 17 additions & 4 deletions deploy/utils/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,23 +88,36 @@ These scripts interact with the Persistent Volume Claim (PVC) that stores config

```bash
# The profiling job reads your DGD config from the PVC
python3 deploy/utils/inject_manifest.py \
# IMPORTANT: All paths must start with /data/ for security reasons
python3 -m deploy.utils.inject_manifest \
--namespace $NAMESPACE \
--src ./my-disagg.yaml \
--dest /configs/disagg.yaml
--dest /data/configs/disagg.yaml
```

**Download benchmark/profiling results:**

```bash
# After benchmarking or profiling completes, download results
python3 deploy/utils/download_pvc_results.py \
python3 -m deploy.utils.download_pvc_results \
--namespace $NAMESPACE \
--output-dir ./pvc_files \
--folder /results \
--folder /data/results \
--no-config # optional: skip *.yaml/*.yml in the download
```

#### Path Requirements

**Important**: The PVC is mounted at `/data` in the access pod for security reasons. All destination paths must start with `/data/`.

**Common path patterns:**
- `/data/configs/` - Configuration files (DGD manifests)
- `/data/results/` - Benchmark results
- `/data/profiling_results/` - Profiling data
- `/data/benchmarking/` - Benchmarking artifacts

**User-friendly error messages**: If you forget the `/data/` prefix, the script will show a helpful error message with the correct path and example commands.

#### Next Steps

For complete benchmarking workflows:
Expand Down
21 changes: 16 additions & 5 deletions deploy/utils/download_pvc_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

Usage:
python3 download_pvc_results.py --namespace <namespace> --output-dir <local_directory> \
--folder </absolute/folder/in/pvc> [--no-config]
--folder /data/<folder/in/pvc> [--no-config]
"""

import argparse
Expand All @@ -36,7 +36,7 @@
from deploy.utils.kubernetes import (
check_kubectl_access,
cleanup_access_pod,
deploy_access_pod,
ensure_clean_access_pod,
run_command,
)
except ModuleNotFoundError:
Expand All @@ -46,7 +46,7 @@
from deploy.utils.kubernetes import (
check_kubectl_access,
cleanup_access_pod,
deploy_access_pod,
ensure_clean_access_pod,
run_command,
)

Expand Down Expand Up @@ -182,19 +182,30 @@ def main():
parser.add_argument(
"--folder",
required=True,
help="Absolute folder path in the PVC to download, e.g. /profiling_results or /benchmarking_results",
help="Absolute folder path in the PVC to download, must start with /data/, e.g. /data/profiling_results or /data/benchmarking_results",
)

args = parser.parse_args()

# Validate folder path starts with /data/
if not args.folder.startswith("/data/"):
print("❌ Error: Folder path must start with '/data/'")
print(f" Provided: {args.folder}")
print(" Quick Fix: Add '/data/' prefix to your path")
print(" Examples:")
print(" /profiling_results → /data/profiling_results")
print(" /benchmarking_results → /data/benchmarking_results")
print(" /configs → /data/configs")
sys.exit(1)

print("📥 PVC Results Download")
print("=" * 40)

# Validate inputs
check_kubectl_access(args.namespace)

# Deploy access pod
pod_name = deploy_access_pod(args.namespace)
pod_name = ensure_clean_access_pod(args.namespace)
try:
# List and download files
files = list_pvc_contents(args.namespace, pod_name, args.folder, args.no_config)
Expand Down
46 changes: 36 additions & 10 deletions deploy/utils/inject_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
Copies any Kubernetes manifest file into the PVC for later use by jobs.
Both the source manifest path and destination path in the PVC are required.

IMPORTANT: The PVC is mounted at /data in the access pod for security reasons.
All destination paths must start with '/data/'.

Usage:
python3 inject_manifest.py --namespace <namespace> --src <local_manifest.yaml> --dest <absolute_path_in_pvc>

Examples:
python3 inject_manifest.py --namespace <ns> --src ./my-disagg.yaml --dest /configs/disagg.yaml
python3 inject_manifest.py --namespace <ns> --src ./my-agg.yaml --dest /configs/agg.yaml
python3 inject_manifest.py --namespace <ns> --src ./disagg.yaml --dest /data/configs/disagg.yaml
python3 inject_manifest.py --namespace <ns> --src ./my-data.yaml --dest /data/custom/path/data.yaml
"""

import argparse
Expand All @@ -37,7 +40,7 @@
PVC_ACCESS_POD_NAME,
check_kubectl_access,
cleanup_access_pod,
deploy_access_pod,
ensure_clean_access_pod,
run_command,
)

Expand Down Expand Up @@ -100,16 +103,39 @@ def main():
parser.add_argument(
"--dest",
required=True,
help="Absolute target path in PVC (e.g., /profiling_results/agg.yaml)",
help="Absolute target path in PVC (must start with /data/, e.g., /data/configs/agg.yaml)",
)

args = parser.parse_args()

# Validate target_path to prevent directory traversal
if not args.dest.startswith("/"):
print(
"ERROR: Target path must be an absolute path inside the PVC (start with '/')."
)
# Validate target_path to prevent directory traversal and ensure it's within PVC
if not args.dest.startswith("/data/"):
print("=" * 60)
print("❌ ERROR: Invalid target path")
print("=" * 60)
print("The PVC is mounted at /data in the access pod.")
print("All paths must start with '/data/' for security reasons.")
print("")
print("💡 QUICK FIX:")
if args.dest.startswith("/"):
# Suggest the fix
suggested_path = f"/data{args.dest}"
print(f" Change: {args.dest}")
print(f" To: {suggested_path}")
print("")
print("📝 Example commands:")
print(" python3 -m deploy.utils.inject_manifest \\")
print(f" --namespace {args.namespace} \\")
print(f" --src {args.src} \\")
print(f" --dest {suggested_path}")
else:
print(f" Use: /data/{args.dest.lstrip('/')}")
print("")
print("🔍 Common patterns:")
print(" /configs/file.yaml → /data/configs/file.yaml")
print(" /results/data.yaml → /data/results/data.yaml")
print(" /profiling_results/... → /data/profiling_results/...")
print("=" * 60)
sys.exit(1)

if ".." in args.dest:
Expand All @@ -123,7 +149,7 @@ def main():
check_kubectl_access(args.namespace)

# Deploy access pod
deploy_access_pod(args.namespace)
ensure_clean_access_pod(args.namespace)
try:
# Copy manifest
copy_manifest(args.namespace, args.src, args.dest)
Expand Down
Loading
Loading