Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
feat: update benchmarking and deploy utils (#2933)
Signed-off-by: Hannah Zhang <[email protected]>
Signed-off-by: Harrison King Saturley-Hall <[email protected]>
  • Loading branch information
hhzhang16 authored and saturley-hall committed Sep 9, 2025
commit d9d82806472819614df18256fc3effe96594a3aa
5 changes: 5 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ The benchmarking framework supports:
- Customizable concurrency levels (configurable via CONCURRENCIES env var), sequence lengths, and models
- Automated performance plot generation with custom labels

**Sequential GPU Usage:**
- Models are deployed and benchmarked **sequentially**, not in parallel
- Each deployment gets exclusive access to all available GPUs during its benchmark run
- Ensures accurate performance measurements and fair comparison across configurations

**Supported Backends:**
- DynamoGraphDeployments
- External HTTP endpoints (for comparison with non-Dynamo backends)
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ DYNAMO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"

# Configuration - all set via command line arguments
NAMESPACE=""
MODEL="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
MODEL="Qwen/Qwen3-0.6B"
ISL=2000
STD=10
OSL=256
Expand Down Expand Up @@ -46,7 +46,7 @@ REQUIRED:

OPTIONS:
-h, --help Show this help message
-m, --model MODEL Model name for GenAI-Perf configuration and logging (default: deepseek-ai/DeepSeek-R1-Distill-Llama-8B)
-m, --model MODEL Model name for GenAI-Perf configuration and logging (default: Qwen/Qwen3-0.6B)
NOTE: This must match the model configured in your deployment manifests and the model deployed in any endpoints.
-i, --isl LENGTH Input sequence length (default: $ISL)
-s, --std STDDEV Input sequence standard deviation (default: $STD)
Expand Down
11 changes: 3 additions & 8 deletions benchmarks/profiler/deploy/profile_sla_job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ spec:
command: ["python", "-m", "benchmarks.profiler.profile_sla"]
args:
- --config
- /workspace/configs/disagg.yaml
- /data/configs/disagg.yaml
- --output-dir
- /workspace/profiling_results
- /data/profiling_results
- --namespace
- ${NAMESPACE}
- --backend
Expand All @@ -50,15 +50,10 @@ spec:
- "20"
volumeMounts:
- name: output-volume
mountPath: /workspace/profiling_results
- name: configs
mountPath: /workspace/configs
mountPath: /data
restartPolicy: Never
volumes:
- name: output-volume
persistentVolumeClaim:
claimName: dynamo-pvc
- name: configs
persistentVolumeClaim:
claimName: dynamo-pvc
backoffLimit: 0
2 changes: 2 additions & 0 deletions benchmarks/profiler/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
6 changes: 3 additions & 3 deletions benchmarks/utils/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,17 @@ def main() -> int:
help="Input in format <label>=<manifest_path_or_endpoint>. Can be specified multiple times for comparisons.",
)
parser.add_argument("--namespace", required=True, help="Kubernetes namespace")
parser.add_argument("--isl", type=int, default=200, help="Input sequence length")
parser.add_argument("--isl", type=int, default=2000, help="Input sequence length")
parser.add_argument(
"--std",
type=int,
default=10,
help="Input sequence standard deviation",
)
parser.add_argument("--osl", type=int, default=200, help="Output sequence length")
parser.add_argument("--osl", type=int, default=256, help="Output sequence length")
parser.add_argument(
"--model",
default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
default="Qwen/Qwen3-0.6B",
help="Model name",
)
parser.add_argument(
Expand Down
4 changes: 2 additions & 2 deletions components/backends/sglang/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ spec:
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /workspace/profiling_results
mountPoint: /data/profiling_results
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
Expand All @@ -62,7 +62,7 @@ spec:
--environment=kubernetes
--backend=sglang
--adjustment-interval=60
--profile-results-dir=/workspace/profiling_results
--profile-results-dir=/data/profiling_results
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: dynamo
componentType: frontend
Expand Down
4 changes: 2 additions & 2 deletions components/backends/vllm/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ spec:
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /workspace/profiling_results
mountPoint: /data/profiling_results
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
Expand All @@ -65,7 +65,7 @@ spec:
--environment=kubernetes
--backend=vllm
--adjustment-interval=60
--profile-results-dir=/workspace/profiling_results
--profile-results-dir=/data/profiling_results
--prometheus-port=9085
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: vllm-disagg-planner
Expand Down
21 changes: 17 additions & 4 deletions deploy/utils/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,23 +88,36 @@ These scripts interact with the Persistent Volume Claim (PVC) that stores config

```bash
# The profiling job reads your DGD config from the PVC
python3 deploy/utils/inject_manifest.py \
# IMPORTANT: All paths must start with /data/ for security reasons
python3 -m deploy.utils.inject_manifest \
--namespace $NAMESPACE \
--src ./my-disagg.yaml \
--dest /configs/disagg.yaml
--dest /data/configs/disagg.yaml
```

**Download benchmark/profiling results:**

```bash
# After benchmarking or profiling completes, download results
python3 deploy/utils/download_pvc_results.py \
python3 -m deploy.utils.download_pvc_results \
--namespace $NAMESPACE \
--output-dir ./pvc_files \
--folder /results \
--folder /data/results \
--no-config # optional: skip *.yaml/*.yml in the download
```

#### Path Requirements

**Important**: The PVC is mounted at `/data` in the access pod for security reasons. All destination paths must start with `/data/`.

**Common path patterns:**
- `/data/configs/` - Configuration files (DGD manifests)
- `/data/results/` - Benchmark results
- `/data/profiling_results/` - Profiling data
- `/data/benchmarking/` - Benchmarking artifacts

**User-friendly error messages**: If you forget the `/data/` prefix, the script will show a helpful error message with the correct path and example commands.

#### Next Steps

For complete benchmarking workflows:
Expand Down
21 changes: 16 additions & 5 deletions deploy/utils/download_pvc_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

Usage:
python3 download_pvc_results.py --namespace <namespace> --output-dir <local_directory> \
--folder </absolute/folder/in/pvc> [--no-config]
--folder /data/<folder/in/pvc> [--no-config]
"""

import argparse
Expand All @@ -36,7 +36,7 @@
from deploy.utils.kubernetes import (
check_kubectl_access,
cleanup_access_pod,
deploy_access_pod,
ensure_clean_access_pod,
run_command,
)
except ModuleNotFoundError:
Expand All @@ -46,7 +46,7 @@
from deploy.utils.kubernetes import (
check_kubectl_access,
cleanup_access_pod,
deploy_access_pod,
ensure_clean_access_pod,
run_command,
)

Expand Down Expand Up @@ -182,19 +182,30 @@ def main():
parser.add_argument(
"--folder",
required=True,
help="Absolute folder path in the PVC to download, e.g. /profiling_results or /benchmarking_results",
help="Absolute folder path in the PVC to download, must start with /data/, e.g. /data/profiling_results or /data/benchmarking_results",
)

args = parser.parse_args()

# Validate folder path starts with /data/
if not args.folder.startswith("/data/"):
print("❌ Error: Folder path must start with '/data/'")
print(f" Provided: {args.folder}")
print(" Quick Fix: Add '/data/' prefix to your path")
print(" Examples:")
print(" /profiling_results → /data/profiling_results")
print(" /benchmarking_results → /data/benchmarking_results")
print(" /configs → /data/configs")
sys.exit(1)

print("📥 PVC Results Download")
print("=" * 40)

# Validate inputs
check_kubectl_access(args.namespace)

# Deploy access pod
pod_name = deploy_access_pod(args.namespace)
pod_name = ensure_clean_access_pod(args.namespace)
try:
# List and download files
files = list_pvc_contents(args.namespace, pod_name, args.folder, args.no_config)
Expand Down
46 changes: 36 additions & 10 deletions deploy/utils/inject_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
Copies any Kubernetes manifest file into the PVC for later use by jobs.
Both the source manifest path and destination path in the PVC are required.

IMPORTANT: The PVC is mounted at /data in the access pod for security reasons.
All destination paths must start with '/data/'.

Usage:
python3 inject_manifest.py --namespace <namespace> --src <local_manifest.yaml> --dest <absolute_path_in_pvc>

Examples:
python3 inject_manifest.py --namespace <ns> --src ./my-disagg.yaml --dest /configs/disagg.yaml
python3 inject_manifest.py --namespace <ns> --src ./my-agg.yaml --dest /configs/agg.yaml
python3 inject_manifest.py --namespace <ns> --src ./disagg.yaml --dest /data/configs/disagg.yaml
python3 inject_manifest.py --namespace <ns> --src ./my-data.yaml --dest /data/custom/path/data.yaml
"""

import argparse
Expand All @@ -37,7 +40,7 @@
PVC_ACCESS_POD_NAME,
check_kubectl_access,
cleanup_access_pod,
deploy_access_pod,
ensure_clean_access_pod,
run_command,
)

Expand Down Expand Up @@ -100,16 +103,39 @@ def main():
parser.add_argument(
"--dest",
required=True,
help="Absolute target path in PVC (e.g., /profiling_results/agg.yaml)",
help="Absolute target path in PVC (must start with /data/, e.g., /data/configs/agg.yaml)",
)

args = parser.parse_args()

# Validate target_path to prevent directory traversal
if not args.dest.startswith("/"):
print(
"ERROR: Target path must be an absolute path inside the PVC (start with '/')."
)
# Validate target_path to prevent directory traversal and ensure it's within PVC
if not args.dest.startswith("/data/"):
print("=" * 60)
print("❌ ERROR: Invalid target path")
print("=" * 60)
print("The PVC is mounted at /data in the access pod.")
print("All paths must start with '/data/' for security reasons.")
print("")
print("💡 QUICK FIX:")
if args.dest.startswith("/"):
# Suggest the fix
suggested_path = f"/data{args.dest}"
print(f" Change: {args.dest}")
print(f" To: {suggested_path}")
print("")
print("📝 Example commands:")
print(" python3 -m deploy.utils.inject_manifest \\")
print(f" --namespace {args.namespace} \\")
print(f" --src {args.src} \\")
print(f" --dest {suggested_path}")
else:
print(f" Use: /data/{args.dest.lstrip('/')}")
print("")
print("🔍 Common patterns:")
print(" /configs/file.yaml → /data/configs/file.yaml")
print(" /results/data.yaml → /data/results/data.yaml")
print(" /profiling_results/... → /data/profiling_results/...")
print("=" * 60)
sys.exit(1)

if ".." in args.dest:
Expand All @@ -123,7 +149,7 @@ def main():
check_kubectl_access(args.namespace)

# Deploy access pod
deploy_access_pod(args.namespace)
ensure_clean_access_pod(args.namespace)
try:
# Copy manifest
copy_manifest(args.namespace, args.src, args.dest)
Expand Down
Loading
Loading