diff --git a/benchmarks/incluster/README.md b/benchmarks/incluster/README.md new file mode 120000 index 00000000000..2509b53553b --- /dev/null +++ b/benchmarks/incluster/README.md @@ -0,0 +1 @@ +../../docs/benchmarks/benchmarking.md#server-side-benchmarking-in-cluster \ No newline at end of file diff --git a/benchmarks/incluster/benchmark_job.yaml b/benchmarks/incluster/benchmark_job.yaml new file mode 100644 index 00000000000..3bcab9f85da --- /dev/null +++ b/benchmarks/incluster/benchmark_job.yaml @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: batch/v1 +kind: Job +metadata: + name: dynamo-benchmark +spec: + template: + spec: + serviceAccountName: dynamo-sa + imagePullSecrets: + - name: docker-imagepullsecret + securityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + containers: + - name: benchmark-runner + image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + resources: + requests: + cpu: "4" + memory: "8Gi" + limits: + cpu: "8" + memory: "16Gi" + env: + - name: HUGGING_FACE_HUB_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: HF_TOKEN + command: ["python3", "-m", "benchmarks.utils.benchmark"] + args: + - --model + - "Qwen/Qwen3-0.6B" + - --isl + - "2000" + - --std + - "10" + - --osl + - "256" + - --output-dir + - /data/results + - --input + - "qwen-vllm-agg=vllm-agg-frontend:8000" + # add more copies of lines 58-59 for each additional service if you want to benchmark multiple services + # - --input + # - "name=service-url:port" + volumeMounts: + - name: data-volume + mountPath: /data + restartPolicy: Never + volumes: + - name: data-volume + persistentVolumeClaim: + claimName: dynamo-pvc + backoffLimit: 0 + ttlSecondsAfterFinished: 3600 # Clean up job after 1 hour diff --git a/benchmarks/utils/benchmark.py b/benchmarks/utils/benchmark.py index 944c2d42155..840660c5e51 100755 --- a/benchmarks/utils/benchmark.py +++ b/benchmarks/utils/benchmark.py @@ -7,53 +7,59 @@ import re import sys from typing import Dict, Tuple +from urllib.parse import urlsplit -from benchmarks.utils.workflow import run_benchmark_workflow +from benchmarks.utils.workflow import has_http_scheme, run_benchmark_workflow +from deploy.utils.kubernetes import is_running_in_cluster def validate_inputs(inputs: Dict[str, str]) -> None: - """Validate that all inputs are HTTP endpoints""" + """Validate that all inputs are HTTP endpoints or internal service URLs when running in cluster""" for label, value in inputs.items(): - if not value.lower().startswith(("http://", "https://")): - raise ValueError( - f"Input '{label}' must be an HTTP endpoint (starting with http:// or https://). Got: {value}" - ) + v = value.strip() + if is_running_in_cluster(): + # Allow HTTP(S) or internal service URLs like host[:port][/path] + if has_http_scheme(v): + pass + else: + parts = urlsplit(f"//{v}") + host_ok = bool(parts.hostname) + port_ok = parts.port is None or (1 <= parts.port <= 65535) + if not (host_ok and port_ok): + raise ValueError( + f"Input '{label}' must be HTTP(S) or internal service URL. Got: {value}" + ) + else: + if not has_http_scheme(v): + raise ValueError(f"Input '{label}' must be HTTP endpoint. Got: {value}") # Validate reserved labels if label.lower() == "plots": - raise ValueError( - "Label 'plots' is reserved and cannot be used. Please choose a different label." - ) + raise ValueError("Label 'plots' is reserved") def parse_input(input_str: str) -> Tuple[str, str]: """Parse input string in format key=value with additional validation""" if "=" not in input_str: - raise ValueError( - f"Invalid input format. Expected: