diff --git a/benchmarks/README.md b/benchmarks/README.md index 3c310ffa4a..42bc317f54 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -61,6 +61,11 @@ The benchmarking framework supports: - Customizable concurrency levels (configurable via CONCURRENCIES env var), sequence lengths, and models - Automated performance plot generation with custom labels +**Sequential GPU Usage:** +- Models are deployed and benchmarked **sequentially**, not in parallel +- Each deployment gets exclusive access to all available GPUs during its benchmark run +- Ensures accurate performance measurements and fair comparison across configurations + **Supported Backends:** - DynamoGraphDeployments - External HTTP endpoints (for comparison with non-Dynamo backends) diff --git a/benchmarks/benchmark.sh b/benchmarks/benchmark.sh index 797545c517..5b86280f09 100755 --- a/benchmarks/benchmark.sh +++ b/benchmarks/benchmark.sh @@ -11,7 +11,7 @@ DYNAMO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # Configuration - all set via command line arguments NAMESPACE="" -MODEL="deepseek-ai/DeepSeek-R1-Distill-Llama-8B" +MODEL="Qwen/Qwen3-0.6B" ISL=2000 STD=10 OSL=256 @@ -46,7 +46,7 @@ REQUIRED: OPTIONS: -h, --help Show this help message - -m, --model MODEL Model name for GenAI-Perf configuration and logging (default: deepseek-ai/DeepSeek-R1-Distill-Llama-8B) + -m, --model MODEL Model name for GenAI-Perf configuration and logging (default: Qwen/Qwen3-0.6B) NOTE: This must match the model configured in your deployment manifests and the model deployed in any endpoints. -i, --isl LENGTH Input sequence length (default: $ISL) -s, --std STDDEV Input sequence standard deviation (default: $STD) diff --git a/benchmarks/profiler/deploy/profile_sla_job.yaml b/benchmarks/profiler/deploy/profile_sla_job.yaml index 14be68c7b2..f0d39f0bc3 100644 --- a/benchmarks/profiler/deploy/profile_sla_job.yaml +++ b/benchmarks/profiler/deploy/profile_sla_job.yaml @@ -29,9 +29,9 @@ spec: command: ["python", "-m", "benchmarks.profiler.profile_sla"] args: - --config - - /workspace/configs/disagg.yaml + - /data/configs/disagg.yaml - --output-dir - - /workspace/profiling_results + - /data/profiling_results - --namespace - ${NAMESPACE} - --backend @@ -50,15 +50,10 @@ spec: - "20" volumeMounts: - name: output-volume - mountPath: /workspace/profiling_results - - name: configs - mountPath: /workspace/configs + mountPath: /data restartPolicy: Never volumes: - name: output-volume persistentVolumeClaim: claimName: dynamo-pvc - - name: configs - persistentVolumeClaim: - claimName: dynamo-pvc backoffLimit: 0 diff --git a/benchmarks/profiler/utils/__init__.py b/benchmarks/profiler/utils/__init__.py new file mode 100644 index 0000000000..1a8431c3e3 --- /dev/null +++ b/benchmarks/profiler/utils/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/benchmarks/utils/benchmark.py b/benchmarks/utils/benchmark.py index 236757049a..c007f0a211 100755 --- a/benchmarks/utils/benchmark.py +++ b/benchmarks/utils/benchmark.py @@ -54,17 +54,17 @@ def main() -> int: help="Input in format