Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
b00cc28
add sub component type
tmonty12 Sep 19, 2025
433c71e
add testing
tmonty12 Sep 19, 2025
11f9e59
planner using prometheus env var endpoint
tmonty12 Sep 22, 2025
8eb0665
planner uses subComponentType and testingt
tmonty12 Sep 23, 2025
ea84c64
use deployment validation error
tmonty12 Sep 23, 2025
0a71b68
add back copyright header
tmonty12 Sep 23, 2025
3ec215d
backwards compatibility with framework component name
tmonty12 Sep 23, 2025
411c02f
update pre deployment profiling to use subComponentType
tmonty12 Sep 24, 2025
c59dd08
small comments
tmonty12 Sep 24, 2025
8e5dfda
update planner manifest to remove prometheus svc and use subComponent…
tmonty12 Sep 24, 2025
69bc1a9
update sla planner deployment docs
tmonty12 Sep 24, 2025
7dc6696
fix doc link
tmonty12 Sep 24, 2025
752b4ac
update profiler config and fix ci
tmonty12 Sep 24, 2025
0761925
small fixes
tmonty12 Sep 24, 2025
51782ff
more small fixes
tmonty12 Sep 24, 2025
3cc6677
revert changes to profiler - will do so in follow on PR
tmonty12 Sep 24, 2025
88b4181
args not optional
tmonty12 Sep 24, 2025
4da6983
small docs update
tmonty12 Sep 24, 2025
20504a2
properly parse prometheus metrics
tmonty12 Sep 26, 2025
3661f5d
fix ci
tmonty12 Sep 26, 2025
74e054c
fix virtual_connector
tmonty12 Sep 26, 2025
823327d
fix mypy
tmonty12 Sep 26, 2025
2c85f2d
remove prometheus server
tmonty12 Sep 26, 2025
0b54bca
pc
tedzhouhk Sep 26, 2025
15d524e
add subComponentType, remove prometheus installation, remove service …
tmonty12 Sep 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
475 changes: 257 additions & 218 deletions benchmarks/profiler/utils/config.py

Large diffs are not rendered by default.

39 changes: 2 additions & 37 deletions components/backends/sglang/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ metadata:
name: sglang-disagg-planner
spec:
envs:
- name: DYNAMO_SERVICE_CONFIG
value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["sglang-disagg-planner-frontend:8000"]}]}]}}'
- name: DYNAMO_NAMESPACE
value: "dynamo"
services:
Expand Down Expand Up @@ -61,45 +59,11 @@ spec:
--backend=sglang
--adjustment-interval=60
--profile-results-dir=/data/profiling_results
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: dynamo
componentType: frontend
replicas: 1
envs:
- name: PYTHONPATH
value: "/workspace/components/planner/src"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.planner.prometheus"
decode:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
replicas: 2
resources:
limits:
Expand Down Expand Up @@ -131,6 +95,7 @@ spec:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
replicas: 2
resources:
limits:
Expand Down
44 changes: 2 additions & 42 deletions components/backends/trtllm/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ metadata:
name: trtllm-disagg-planner
spec:
envs:
- name: DYNAMO_SERVICE_CONFIG
value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:8000"]}]},{"job_name":"frontend","static_configs":[{"targets":["trtllm-disagg-planner-frontend:8000"]}]}]}}'
- name: DYNAMO_NAMESPACE
value: "trtllm-disagg-planner"
services:
Expand Down Expand Up @@ -41,9 +39,6 @@ spec:
envFromSecret: hf-token-secret
componentType: planner
replicas: 1
envs:
- name: PROMETHEUS_PORT
value: "8000"
livenessProbe:
exec:
command:
Expand Down Expand Up @@ -84,47 +79,11 @@ spec:
- --adjustment-interval=60
- --profile-results-dir=/data/profiling_results
- --prometheus-port=9085
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: trtllm-disagg-planner
componentType: frontend
replicas: 1
envs:
- name: PYTHONPATH
value: "/workspace/components/planner/src"
- name: PROMETHEUS_PORT
value: "8000"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- python3
args:
- -m
- dynamo.planner.prometheus
TRTLLMDecodeWorker:
dynamoNamespace: trtllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
replicas: 1
livenessProbe:
httpGet:
Expand Down Expand Up @@ -173,6 +132,7 @@ spec:
dynamoNamespace: trtllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
replicas: 1
resources:
limits:
Expand Down
41 changes: 2 additions & 39 deletions components/backends/vllm/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@ metadata:
name: vllm-disagg-planner
spec:
envs:
- name: DYNAMO_SERVICE_CONFIG
value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}'
- name: DYNAMO_NAMESPACE
value: "vllm-disagg-planner"
- name: PROMETHEUS_PORT
value: "8000"
services:
Frontend:
dynamoNamespace: vllm-disagg-planner
Expand Down Expand Up @@ -63,45 +59,11 @@ spec:
--backend=vllm
--adjustment-interval=60
--profile-results-dir=/data/profiling_results
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: vllm-disagg-planner
componentType: frontend
replicas: 1
envs:
- name: PYTHONPATH
value: "/workspace/components/planner/src"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.planner.prometheus"
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
replicas: 2
resources:
limits:
Expand All @@ -127,6 +89,7 @@ spec:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
replicas: 2
resources:
limits:
Expand Down
10 changes: 8 additions & 2 deletions components/planner/src/dynamo/planner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,17 @@
"LoadPlannerDefaults",
"SLAPlannerDefaults",
"ServiceConfig",
"TargetReplica",
"SubComponentType",
]
# Import the classes
from dynamo.planner.config import ServiceConfig
from dynamo.planner.defaults import LoadPlannerDefaults, SLAPlannerDefaults
from dynamo.planner.kubernetes_connector import KubernetesConnector
from dynamo.planner.defaults import (
LoadPlannerDefaults,
SLAPlannerDefaults,
SubComponentType,
)
from dynamo.planner.kubernetes_connector import KubernetesConnector, TargetReplica
from dynamo.planner.planner_connector import PlannerConnector
from dynamo.planner.virtual_connector import VirtualConnector

Expand Down
76 changes: 76 additions & 0 deletions components/planner/src/dynamo/planner/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,16 @@

import logging
import os
from enum import Enum
from typing import Optional

from pydantic import BaseModel

from dynamo.planner.kube import get_current_k8s_namespace
from dynamo.planner.utils.exceptions import (
DuplicateSubComponentError,
SubComponentNotFoundError,
)
from dynamo.runtime.logging import configure_dynamo_logging

configure_dynamo_logging()
Expand Down Expand Up @@ -56,6 +64,10 @@ class LoadPlannerDefaults(BasePlannerDefaults):

def _get_default_prometheus_endpoint(port: str, namespace: str):
"""Compute default prometheus endpoint using environment variables and Kubernetes service discovery"""
prometheus_endpoint = os.environ.get("PROMETHEUS_ENDPOINT", "").strip()
if prometheus_endpoint:
logger.debug("Using PROMETHEUS_ENDPOINT override: %s", prometheus_endpoint)
return prometheus_endpoint

k8s_namespace = get_current_k8s_namespace()
if k8s_namespace and k8s_namespace != "default":
Expand Down Expand Up @@ -124,3 +136,67 @@ class TrtllmComponentName:
"sglang": SGLangComponentName,
"trtllm": TrtllmComponentName,
}


class SubComponentType(str, Enum):
PREFILL = "prefill"
DECODE = "decode"


class Service(BaseModel):
name: str
service: dict

def number_replicas(self) -> int:
return self.service.get("replicas", 0)


# TODO: still supporting framework component names for backwards compatibility
# Should be deprecated in favor of service subComponentType
def get_service_from_sub_component_type_or_name(
deployment: dict,
sub_component_type: SubComponentType,
component_name: Optional[str] = None,
) -> Service:
"""
Get the current replicas for a component in a graph deployment

Returns: Service object

Raises:
SubComponentNotFoundError: If no service with the specified subComponentType is found
DuplicateSubComponentError: If multiple services with the same subComponentType are found
"""
services = deployment.get("spec", {}).get("services", {})

# Collect all available subComponentTypes for better error messages
available_types = []
matching_services = []

for curr_name, curr_service in services.items():
service_sub_type = curr_service.get("subComponentType", "")
if service_sub_type:
available_types.append(service_sub_type)

if service_sub_type == sub_component_type.value:
matching_services.append((curr_name, curr_service))

# Check for duplicates
if len(matching_services) > 1:
service_names = [name for name, _ in matching_services]
raise DuplicateSubComponentError(sub_component_type.value, service_names)

# If no service found with subCompontType and fallback component_name is not provided or not found,
# or if the fallback component has a non-empty subComponentType, raise error
if not matching_services and (
not component_name
or component_name not in services
or services[component_name].get("subComponentType", "") != ""
):
raise SubComponentNotFoundError(sub_component_type.value)
# If fallback component_name is provided and exists within services, add to matching_services
elif not matching_services and component_name in services:
matching_services.append((component_name, services[component_name]))

name, service = matching_services[0]
return Service(name=name, service=service)
Loading
Loading