From d9d82806472819614df18256fc3effe96594a3aa Mon Sep 17 00:00:00 2001 From: hhzhang16 <54051230+hhzhang16@users.noreply.github.com> Date: Mon, 8 Sep 2025 19:53:55 -0700 Subject: [PATCH] feat: update benchmarking and deploy utils (#2933) Signed-off-by: Hannah Zhang Signed-off-by: Harrison King Saturley-Hall --- benchmarks/README.md | 5 + benchmarks/benchmark.sh | 4 +- .../profiler/deploy/profile_sla_job.yaml | 11 +-- benchmarks/profiler/utils/__init__.py | 2 + benchmarks/utils/benchmark.py | 6 +- .../sglang/deploy/disagg_planner.yaml | 4 +- .../backends/vllm/deploy/disagg_planner.yaml | 4 +- deploy/utils/README.md | 21 +++- deploy/utils/download_pvc_results.py | 21 +++- deploy/utils/inject_manifest.py | 46 +++++++-- deploy/utils/kubernetes.py | 97 ++++++++++++++----- deploy/utils/manifests/pvc-access-pod.yaml | 2 +- deploy/utils/setup_k8s_namespace.sh | 4 +- docs/benchmarks/benchmarking.md | 23 ++++- docs/benchmarks/pre_deployment_profiling.md | 31 ++++-- 15 files changed, 209 insertions(+), 72 deletions(-) create mode 100644 benchmarks/profiler/utils/__init__.py diff --git a/benchmarks/README.md b/benchmarks/README.md index 3c310ffa4a..42bc317f54 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -61,6 +61,11 @@ The benchmarking framework supports: - Customizable concurrency levels (configurable via CONCURRENCIES env var), sequence lengths, and models - Automated performance plot generation with custom labels +**Sequential GPU Usage:** +- Models are deployed and benchmarked **sequentially**, not in parallel +- Each deployment gets exclusive access to all available GPUs during its benchmark run +- Ensures accurate performance measurements and fair comparison across configurations + **Supported Backends:** - DynamoGraphDeployments - External HTTP endpoints (for comparison with non-Dynamo backends) diff --git a/benchmarks/benchmark.sh b/benchmarks/benchmark.sh index 797545c517..5b86280f09 100755 --- a/benchmarks/benchmark.sh +++ b/benchmarks/benchmark.sh @@ -11,7 +11,7 @@ DYNAMO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # Configuration - all set via command line arguments NAMESPACE="" -MODEL="deepseek-ai/DeepSeek-R1-Distill-Llama-8B" +MODEL="Qwen/Qwen3-0.6B" ISL=2000 STD=10 OSL=256 @@ -46,7 +46,7 @@ REQUIRED: OPTIONS: -h, --help Show this help message - -m, --model MODEL Model name for GenAI-Perf configuration and logging (default: deepseek-ai/DeepSeek-R1-Distill-Llama-8B) + -m, --model MODEL Model name for GenAI-Perf configuration and logging (default: Qwen/Qwen3-0.6B) NOTE: This must match the model configured in your deployment manifests and the model deployed in any endpoints. -i, --isl LENGTH Input sequence length (default: $ISL) -s, --std STDDEV Input sequence standard deviation (default: $STD) diff --git a/benchmarks/profiler/deploy/profile_sla_job.yaml b/benchmarks/profiler/deploy/profile_sla_job.yaml index 14be68c7b2..f0d39f0bc3 100644 --- a/benchmarks/profiler/deploy/profile_sla_job.yaml +++ b/benchmarks/profiler/deploy/profile_sla_job.yaml @@ -29,9 +29,9 @@ spec: command: ["python", "-m", "benchmarks.profiler.profile_sla"] args: - --config - - /workspace/configs/disagg.yaml + - /data/configs/disagg.yaml - --output-dir - - /workspace/profiling_results + - /data/profiling_results - --namespace - ${NAMESPACE} - --backend @@ -50,15 +50,10 @@ spec: - "20" volumeMounts: - name: output-volume - mountPath: /workspace/profiling_results - - name: configs - mountPath: /workspace/configs + mountPath: /data restartPolicy: Never volumes: - name: output-volume persistentVolumeClaim: claimName: dynamo-pvc - - name: configs - persistentVolumeClaim: - claimName: dynamo-pvc backoffLimit: 0 diff --git a/benchmarks/profiler/utils/__init__.py b/benchmarks/profiler/utils/__init__.py new file mode 100644 index 0000000000..1a8431c3e3 --- /dev/null +++ b/benchmarks/profiler/utils/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/benchmarks/utils/benchmark.py b/benchmarks/utils/benchmark.py index 236757049a..c007f0a211 100755 --- a/benchmarks/utils/benchmark.py +++ b/benchmarks/utils/benchmark.py @@ -54,17 +54,17 @@ def main() -> int: help="Input in format