Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
68714fd
rename
tedzhouhk Sep 19, 2025
1c41d56
stage
tedzhouhk Sep 19, 2025
4a73121
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into hzhou…
tedzhouhk Sep 19, 2025
db737f6
stage
tedzhouhk Sep 19, 2025
2a42aea
add test
tedzhouhk Sep 20, 2025
5b88f60
add test
tedzhouhk Sep 20, 2025
330a611
delete
tedzhouhk Sep 20, 2025
684f7c9
bug fixes
tedzhouhk Sep 22, 2025
a895bc2
enable grove
tedzhouhk Sep 22, 2025
a6f70a5
bug
tedzhouhk Sep 22, 2025
0b64cda
remove isl/osl in planner arg
tedzhouhk Sep 22, 2025
4fb5294
update doc and plot script
tedzhouhk Sep 23, 2025
18c0fae
pc
tedzhouhk Sep 23, 2025
bad3f5c
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into hzhou…
tedzhouhk Sep 23, 2025
76e77aa
pc
tedzhouhk Sep 23, 2025
88105cb
doclink
tedzhouhk Sep 23, 2025
44bc765
Merge branch 'main' of https://github.com/ai-dynamo/dynamo into hzhou…
tedzhouhk Sep 23, 2025
23dd336
fix test
tedzhouhk Sep 23, 2025
96d2978
fix conc bug
tedzhouhk Sep 23, 2025
b016918
fix doc, add todo
tedzhouhk Sep 23, 2025
5440e91
Update benchmarks/profiler/profile_sla.py
tedzhouhk Sep 23, 2025
77d2d66
Update benchmarks/profiler/profile_sla.py
tedzhouhk Sep 23, 2025
fe05ce5
Update benchmarks/profiler/profile_sla.py
tedzhouhk Sep 23, 2025
e9c9c71
Update benchmarks/profiler/profile_sla.py
tedzhouhk Sep 23, 2025
2f61d82
Update benchmarks/profiler/profile_sla.py
tedzhouhk Sep 23, 2025
95c718b
Update benchmarks/profiler/profile_sla.py
tedzhouhk Sep 23, 2025
a1ec4dc
Update benchmarks/profiler/utils/plot.py
tedzhouhk Sep 23, 2025
da99c64
doc
tedzhouhk Sep 23, 2025
56241ae
Merge branch 'hzhou/sglang-dsr1-sweep' of https://github.com/ai-dynam…
tedzhouhk Sep 23, 2025
06d9bd6
Update docs/benchmarks/pre_deployment_profiling.md
tedzhouhk Sep 23, 2025
b562984
pc
tedzhouhk Sep 23, 2025
4f2f905
Update benchmarks/profiler/profile_sla.py
tedzhouhk Sep 23, 2025
fb897c1
Update benchmarks/profiler/utils/plot.py
tedzhouhk Sep 23, 2025
ff72609
better code
tedzhouhk Sep 24, 2025
5f428dd
Merge branch 'hzhou/sglang-dsr1-sweep' of https://github.com/ai-dynam…
tedzhouhk Sep 24, 2025
3acb775
pc
tedzhouhk Sep 24, 2025
fbac34f
add todo
tedzhouhk Sep 24, 2025
aead4d3
mypy
tedzhouhk Sep 24, 2025
82595a9
update test
tedzhouhk Sep 24, 2025
17d3349
pc
tedzhouhk Sep 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
better code
Signed-off-by: hongkuanz <[email protected]>
  • Loading branch information
tedzhouhk committed Sep 24, 2025
commit ff72609a16ac04266a3e8fd891ba10503bd9dcf9
237 changes: 94 additions & 143 deletions benchmarks/profiler/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,59 @@ def set_multinode_config(worker_service, gpu_count: int, num_gpus_per_node: int)
worker_service.multinode.nodeCount = node_count


def get_worker_service_from_config(config: dict):
"""Helper function to get the SGLang decode worker service from config."""
cfg = Config.model_validate(config)
return cfg.spec.services[WORKER_COMPONENT_NAMES["sglang"].decode_worker_k8s_name]


def setup_worker_service_resources(worker_service, gpu_count: int, num_gpus_per_node: int = None):
"""Helper function to set up worker service resources (requests and limits)."""
# Handle multinode configuration if num_gpus_per_node is provided
if num_gpus_per_node is not None:
set_multinode_config(worker_service, gpu_count, num_gpus_per_node)

# Ensure resources exists
if worker_service.resources is None:
worker_service.resources = ServiceResources()

# Ensure requests exists
if worker_service.resources.requests is None:
worker_service.resources.requests = {}

# Set GPU requests
gpu_value = min(gpu_count, num_gpus_per_node) if num_gpus_per_node is not None else gpu_count
worker_service.resources.requests["gpu"] = str(gpu_value)

# Update limits if they exist
if worker_service.resources.limits is not None:
worker_service.resources.limits["gpu"] = str(gpu_value)


def validate_and_get_worker_args(worker_service):
"""Helper function to validate worker service and get its arguments."""
if (
not worker_service.extraPodSpec
or not worker_service.extraPodSpec.mainContainer
):
raise ValueError(
f"Missing extraPodSpec or mainContainer in SGLang decode worker service '{WORKER_COMPONENT_NAMES['sglang'].decode_worker_k8s_name}'"
)

args = worker_service.extraPodSpec.mainContainer.args
return break_arguments(args)


def set_argument_value(args: list, arg_name: str, value: str):
"""Helper function to set an argument value, adding it if not present."""
try:
idx = args.index(arg_name)
args[idx + 1] = value
except ValueError:
args = append_argument(args, [arg_name, value])
return args


class ConfigModifierProtocol(Protocol):
@classmethod
def convert_config(
Expand Down Expand Up @@ -563,174 +616,72 @@ def convert_config(
@classmethod
def set_config_tp_size(cls, config: dict, tp_size: int):
cfg = Config.model_validate(config)

worker_service = cfg.spec.services[
WORKER_COMPONENT_NAMES["sglang"].decode_worker_k8s_name
]

# Ensure resources exists
if worker_service.resources is None:
worker_service.resources = ServiceResources()

# Ensure requests exists
if worker_service.resources.requests is None:
worker_service.resources.requests = {}

worker_service.resources.requests["gpu"] = str(tp_size)

# Update limits if they exist
if worker_service.resources.limits is not None:
worker_service.resources.limits["gpu"] = str(tp_size)

if (
not worker_service.extraPodSpec
or not worker_service.extraPodSpec.mainContainer
):
raise ValueError(
f"Missing extraPodSpec or mainContainer in SGLang decode worker service '{WORKER_COMPONENT_NAMES['sglang'].decode_worker_k8s_name}'"
)
args = worker_service.extraPodSpec.mainContainer.args

args = break_arguments(args)

try:
idx = args.index("--tp")
args[idx + 1] = str(tp_size)
except ValueError:
args = append_argument(args, ["--tp", str(tp_size)])

worker_service = get_worker_service_from_config(config)

# Set up resources
setup_worker_service_resources(worker_service, tp_size)

# Get and validate args
args = validate_and_get_worker_args(worker_service)

# Set --tp argument
args = set_argument_value(args, "--tp", str(tp_size))

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)

return cfg.model_dump()

@classmethod
def set_config_tep_size(cls, config: dict, tep_size: int, num_gpus_per_node: int):
cfg = Config.model_validate(config)

worker_service = cfg.spec.services[
WORKER_COMPONENT_NAMES["sglang"].decode_worker_k8s_name
]

# Handle multinode configuration
set_multinode_config(worker_service, tep_size, num_gpus_per_node)

# Ensure resources exists
if worker_service.resources is None:
worker_service.resources = ServiceResources()

# Ensure requests exists
if worker_service.resources.requests is None:
worker_service.resources.requests = {}

worker_service.resources.requests["gpu"] = str(min(tep_size, num_gpus_per_node))

# Update limits if they exist
if worker_service.resources.limits is not None:
worker_service.resources.limits["gpu"] = str(
min(tep_size, num_gpus_per_node)
)

if (
not worker_service.extraPodSpec
or not worker_service.extraPodSpec.mainContainer
):
raise ValueError(
f"Missing extraPodSpec or mainContainer in SGLang decode worker service '{WORKER_COMPONENT_NAMES['sglang'].decode_worker_k8s_name}'"
)
args = worker_service.extraPodSpec.mainContainer.args

args = break_arguments(args)

worker_service = get_worker_service_from_config(config)

# Set up resources with multinode configuration
setup_worker_service_resources(worker_service, tep_size, num_gpus_per_node)

# Get and validate args
args = validate_and_get_worker_args(worker_service)

# 1. Set --tp=tep_size, if not present add it
try:
idx = args.index("--tp")
args[idx + 1] = str(tep_size)
except ValueError:
args = append_argument(args, ["--tp", str(tep_size)])

args = set_argument_value(args, "--tp", str(tep_size))

# 2. Set --ep-size=tep_size, if not present add it
try:
idx = args.index("--ep-size")
args[idx + 1] = str(tep_size)
except ValueError:
args = append_argument(args, ["--ep-size", str(tep_size)])

args = set_argument_value(args, "--ep-size", str(tep_size))

# 3. Remove --dp if present
args = remove_valued_arguments(args, "--dp")

# 4. Remove --enable-dp-attention if present
if "--enable-dp-attention" in args:
args.remove("--enable-dp-attention")

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)

return cfg.model_dump()

@classmethod
def set_config_dep_size(cls, config: dict, dep_size: int, num_gpus_per_node: int):
cfg = Config.model_validate(config)

worker_service = cfg.spec.services[
WORKER_COMPONENT_NAMES["sglang"].decode_worker_k8s_name
]

# Handle multinode configuration
set_multinode_config(worker_service, dep_size, num_gpus_per_node)

# Ensure resources exists
if worker_service.resources is None:
worker_service.resources = ServiceResources()

# Ensure requests exists
if worker_service.resources.requests is None:
worker_service.resources.requests = {}

worker_service.resources.requests["gpu"] = str(min(dep_size, num_gpus_per_node))

# Update limits if they exist
if worker_service.resources.limits is not None:
worker_service.resources.limits["gpu"] = str(
min(dep_size, num_gpus_per_node)
)

if (
not worker_service.extraPodSpec
or not worker_service.extraPodSpec.mainContainer
):
raise ValueError(
f"Missing extraPodSpec or mainContainer in SGLang decode worker service '{WORKER_COMPONENT_NAMES['sglang'].decode_worker_k8s_name}'"
)
args = worker_service.extraPodSpec.mainContainer.args

args = break_arguments(args)

# 1. Set --tp=dep size
try:
idx = args.index("--tp")
args[idx + 1] = str(dep_size)
except ValueError:
args = append_argument(args, ["--tp", str(dep_size)])

worker_service = get_worker_service_from_config(config)

# Set up resources with multinode configuration
setup_worker_service_resources(worker_service, dep_size, num_gpus_per_node)

# Get and validate args
args = validate_and_get_worker_args(worker_service)

# 1. Set --tp=dep_size
args = set_argument_value(args, "--tp", str(dep_size))

# 2. Set --dp=dep_size (data parallelism across experts)
try:
idx = args.index("--dp")
args[idx + 1] = str(dep_size)
except ValueError:
args = append_argument(args, ["--dp", str(dep_size)])

args = set_argument_value(args, "--dp", str(dep_size))

# 3. Enable --enable-dp-attention
if "--enable-dp-attention" not in args:
args = append_argument(args, "--enable-dp-attention")

# 4. Set --ep-size=dep_size (expert parallelism size)
try:
idx = args.index("--ep-size")
args[idx + 1] = str(dep_size)
except ValueError:
args = append_argument(args, ["--ep-size", str(dep_size)])

args = set_argument_value(args, "--ep-size", str(dep_size))

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)

return cfg.model_dump()

@classmethod
Expand Down