Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
4e1c05e
[trainer, cfg] feat: Add AlgorithmConfig dataclass for type-safe algo…
openhands-agent Jun 20, 2025
9ed10fa
Complete algorithm config dataclass migration
openhands-agent Jun 21, 2025
646a1e7
Remove temporary test files
openhands-agent Jun 21, 2025
d7aa12b
Revert changes and rename algorithm config to algo config
openhands-agent Jun 21, 2025
109340d
Update compute_advantage type annotations and fix linting
openhands-agent Jun 21, 2025
89e4b34
Update all core_algos.py functions to use AlgoConfig type annotations
openhands-agent Jun 21, 2025
f0f406f
Fix compute_grpo_outcome_advantage function signature to include Algo…
openhands-agent Jun 21, 2025
637a358
Merge main into feat/algorithm-config-dataclass
openhands-agent Jun 22, 2025
9eeab2e
init frozen adaptor
eric-haibin-lin Jun 29, 2025
1b85290
move to profiler folder
eric-haibin-lin Jun 30, 2025
ba93223
backward compat namespace move
eric-haibin-lin Jun 30, 2025
da8d771
fix lint
eric-haibin-lin Jun 30, 2025
0b1cb62
remove omega_conf_to_dataclass type
eric-haibin-lin Jun 30, 2025
2c25c76
Refactor algorithm config classes to use frozen dataclasses and BaseC…
devin-ai-integration[bot] Jun 30, 2025
520b23d
Revert documentation changes and fix omega_conf_to_dataclass call
devin-ai-integration[bot] Jun 30, 2025
80685b4
Fix config.get() call in compute_advantage function
devin-ai-integration[bot] Jun 30, 2025
2df1773
Merge main branch and resolve conflicts
devin-ai-integration[bot] Jun 30, 2025
52c62b3
Fix lint issues after merge
devin-ai-integration[bot] Jun 30, 2025
562a111
Fix type annotation and docstring coverage issues
devin-ai-integration[bot] Jun 30, 2025
81d7edf
Add test_base_config_on_cpu.py to allow list and update omega_conf_to…
devin-ai-integration[bot] Jun 30, 2025
a6df414
fix test
eric-haibin-lin Jun 30, 2025
6e743a5
fix litn
eric-haibin-lin Jun 30, 2025
ffa8d77
convert to dataclass upfront
eric-haibin-lin Jun 30, 2025
12c22b8
Merge branch 'feat/algorithm-config-dataclass' of code.byted.org:data…
eric-haibin-lin Jun 30, 2025
e2fac2c
update import stmt
eric-haibin-lin Jun 30, 2025
969a734
merge with main
eric-haibin-lin Jun 30, 2025
69a1a17
fix lint
eric-haibin-lin Jun 30, 2025
f1f4047
add _target_ to megatron config
eric-haibin-lin Jun 30, 2025
7bcd0fe
fix ranks init
eric-haibin-lin Jun 30, 2025
0eacb9f
adjust line-len
eric-haibin-lin Jul 1, 2025
ac19891
adjust len=120
eric-haibin-lin Jul 1, 2025
c907607
merge with main
eric-haibin-lin Jul 1, 2025
e63bbb0
fix lint
eric-haibin-lin Jul 1, 2025
8bce67d
merge with master
eric-haibin-lin Jul 3, 2025
fb93f20
merge with main
eric-haibin-lin Jul 4, 2025
c195f00
Merge remote-tracking branch 'oss/main' into feat/algorithm-config-da…
eric-haibin-lin Jul 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/api/utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,6 @@ FSDP Utilities
Debug Utilities
-------------------

.. automodule:: verl.utils.debug
.. automodule:: verl.utils.profiler
:members: log_gpu_memory_usage, GPUMemoryLogger

1 change: 1 addition & 0 deletions recipe/dapo/config/dapo_trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ reward_model:

algorithm:
filter_groups:
_target_: verl.trainer.config.FilterGroupsConfig
enable: False # We try to avoid forgetting to set enable
metric: null # acc / score / seq_reward / seq_final_reward / ...
max_num_gen_batches: 0 # Non-positive values mean no upper limit
Expand Down
2 changes: 1 addition & 1 deletion recipe/dapo/dapo_ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
compute_advantage,
compute_response_mask,
)
from verl.utils.debug import marked_timer
from verl.utils.profiler import marked_timer


class RayDAPOTrainer(RayPPOTrainer):
Expand Down
28 changes: 26 additions & 2 deletions recipe/dapo/main_dapo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,44 @@
Note that we don't combine the main with ray_trainer as ray_trainer is used by other main.
"""

import copy
import os
import socket

import hydra
import ray
from omegaconf import OmegaConf
from omegaconf import DictConfig, OmegaConf

from verl.trainer.ppo.reward import get_custom_reward_fn
from verl.utils import omega_conf_to_dataclass

from .dapo_ray_trainer import RayDAPOTrainer


def trainer_dict_to_dataclass(conf: DictConfig):
"""Convert specific nested sections of a DictConfig object into dataclass instances.

Args:
conf (DictConfig): An instance of DictConfig, typically from the omegaconf library,
representing a configuration dictionary.

Returns:
DictConfig: A deep copy of the input `conf` with specific sections converted to dataclasses.
"""
# Create a deep copy of the input configuration to avoid modifying the original object
config = copy.deepcopy(conf)
config.algorithm = omega_conf_to_dataclass(config.algorithm)
config.critic.profiler = omega_conf_to_dataclass(config.critic.profiler)
config.reward_model.profiler = omega_conf_to_dataclass(config.reward_model.profiler)
config.actor_rollout_ref.actor.profiler = omega_conf_to_dataclass(config.actor_rollout_ref.actor.profiler)
config.actor_rollout_ref.ref.profiler = omega_conf_to_dataclass(config.actor_rollout_ref.ref.profiler)
config.actor_rollout_ref.rollout.profiler = omega_conf_to_dataclass(config.actor_rollout_ref.rollout.profiler)
return config


@hydra.main(config_path="config", config_name="dapo_trainer", version_base=None)
def main(config):
def main(config_dict):
config = trainer_dict_to_dataclass(config_dict)
run_ppo(config)


Expand Down
2 changes: 1 addition & 1 deletion recipe/entropy/entropy_ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
compute_advantage,
compute_response_mask,
)
from verl.utils.debug import simple_timer
from verl.utils.profiler import simple_timer


class RayEntropyTrainer(RayPPOTrainer):
Expand Down
2 changes: 1 addition & 1 deletion recipe/prime/prime_fsdp_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from verl.single_controller.base.decorator import Dispatch, register
from verl.utils import hf_tokenizer
from verl.utils.checkpoint.fsdp_checkpoint_manager import FSDPCheckpointManager
from verl.utils.debug import log_gpu_memory_usage
from verl.utils.device import get_device_id, get_device_name, get_nccl_backend
from verl.utils.flops_counter import FlopsCounter
from verl.utils.fs import copy_local_path_from_hdfs
Expand All @@ -39,6 +38,7 @@
offload_fsdp_optimizer,
)
from verl.utils.import_utils import import_external_libs
from verl.utils.profiler import log_gpu_memory_usage
from verl.workers.fsdp_workers import create_device_mesh, get_sharding_strategy
from verl.workers.sharding_manager.fsdp_ulysses import FSDPUlyssesShardingManager

Expand Down
2 changes: 1 addition & 1 deletion recipe/prime/prime_ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
from verl.trainer.ppo.ray_trainer import RayPPOTrainer, ResourcePoolManager, Role, WorkerType
from verl.utils.checkpoint.checkpoint_manager import find_latest_ckpt_path
from verl.utils.dataset.rl_dataset import RLHFDataset, collate_fn
from verl.utils.debug.performance import simple_timer
from verl.utils.metric import reduce_metrics
from verl.utils.profiler.performance import simple_timer

from . import prime_core_algos

Expand Down
2 changes: 1 addition & 1 deletion recipe/spin/fsdp_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from verl.single_controller.base.decorator import Dispatch, register
from verl.utils import hf_tokenizer
from verl.utils.checkpoint.fsdp_checkpoint_manager import FSDPCheckpointManager
from verl.utils.debug import log_gpu_memory_usage
from verl.utils.device import get_device_id, get_device_name, get_nccl_backend, get_torch_device
from verl.utils.flops_counter import FlopsCounter
from verl.utils.fs import copy_to_local
Expand All @@ -46,6 +45,7 @@
)
from verl.utils.import_utils import import_external_libs
from verl.utils.model import compute_position_id_with_mask
from verl.utils.profiler import log_gpu_memory_usage
from verl.workers.fsdp_workers import ActorRolloutRefWorker
from verl.workers.sharding_manager.fsdp_ulysses import FSDPUlyssesShardingManager

Expand Down
2 changes: 1 addition & 1 deletion recipe/sppo/dp_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
import verl.utils.torch_functional as verl_F
from verl import DataProto
from verl.trainer.ppo.core_algos import agg_loss, kl_penalty
from verl.utils.debug import GPUMemoryLogger
from verl.utils.device import get_device_id
from verl.utils.profiler import GPUMemoryLogger
from verl.utils.py_functional import append_to_dict
from verl.utils.seqlen_balancing import rearrange_micro_batches
from verl.workers.actor.dp_actor import DataParallelPPOActor
Expand Down
2 changes: 1 addition & 1 deletion recipe/sppo/sppo_ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
compute_response_mask,
)
from verl.trainer.ppo.reward import compute_reward, compute_reward_async
from verl.utils.debug.performance import simple_timer
from verl.utils.profiler.performance import simple_timer
from verl.utils.tracking import ValidationGenerationsLogger


Expand Down
2 changes: 1 addition & 1 deletion recipe/sppo/sppo_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@

from verl.single_controller.base.decorator import Dispatch, register
from verl.utils.checkpoint.fsdp_checkpoint_manager import FSDPCheckpointManager
from verl.utils.debug import log_gpu_memory_usage
from verl.utils.flops_counter import FlopsCounter
from verl.utils.fsdp_utils import offload_fsdp_model_to_cpu, offload_fsdp_optimizer
from verl.utils.import_utils import import_external_libs
from verl.utils.profiler import log_gpu_memory_usage
from verl.workers.fsdp_workers import ActorRolloutRefWorker

logger = logging.getLogger(__file__)
Expand Down
2 changes: 1 addition & 1 deletion tests/special_distributed/test_tensor_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def test_all_gather_data_proto():
def test_vocab_parallel_entropy():
from megatron.core import parallel_state as mpu

from verl.utils.debug import log_gpu_memory_usage
from verl.utils.megatron.tensor_parallel import vocab_parallel_entropy
from verl.utils.profiler import log_gpu_memory_usage
from verl.utils.torch_functional import entropy_from_logits

mpu.initialize_model_parallel(
Expand Down
14 changes: 7 additions & 7 deletions tests/special_sanity/check_api_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@
_ALLOW_LIST = [
"verl.third_party.vllm.LLM",
"verl.third_party.vllm.parallel_state",
"verl.utils.debug.WorkerProfiler",
"verl.utils.debug.WorkerProfilerExtension",
"verl.utils.debug.log_gpu_memory_usage",
"verl.utils.debug.log_print",
"verl.utils.debug.mark_annotate",
"verl.utils.debug.mark_end_range",
"verl.utils.debug.mark_start_range",
"verl.utils.profiler.WorkerProfiler",
"verl.utils.profiler.WorkerProfilerExtension",
"verl.utils.profiler.log_gpu_memory_usage",
"verl.utils.profiler.log_print",
"verl.utils.profiler.mark_annotate",
"verl.utils.profiler.mark_end_range",
"verl.utils.profiler.mark_start_range",
"verl.models.mcore.qwen2_5_vl.get_vision_model_config",
"verl.models.mcore.qwen2_5_vl.get_vision_projection_config",
]
Expand Down
2 changes: 1 addition & 1 deletion tests/special_sanity/check_device_api_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"recipe/prime/prime_ray_trainer.py", # appear in default device_name
"recipe/spin/spin_trainer.py", # appear in default device_name
"recipe/sppo/sppo_ray_trainer.py", # appear in default device_name
"verl/utils/debug/nvtx_profile.py", # appear in NsightSystemsProfiler
"verl/utils/profiler/nvtx_profile.py", # appear in NsightSystemsProfiler
"verl/utils/kernel/linear_cross_entropy.py", # appear in nvidia nvtx
"verl/utils/rendezvous/ray_backend.py", # appear in cupy importance
"verl/single_controller/ray/base.py", # appear in default device_name
Expand Down
2 changes: 1 addition & 1 deletion tests/special_sanity/validate_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def main() -> None:
parser.add_argument(
"--allow-files",
nargs="*",
default=["tests/test_protocol_on_cpu.py"],
default=["tests/test_protocol_on_cpu.py", "tests/test_base_config_on_cpu.py"],
help="Extra top-level test folders that are exempt from the rule",
)
args = parser.parse_args()
Expand Down
42 changes: 42 additions & 0 deletions tests/test_base_config_on_cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright 2024 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest

from verl.base_config import BaseConfig


@pytest.fixture
def base_config_mock():
"""Fixture to create a mock BaseConfig instance with test attributes."""
mock_config = BaseConfig()
mock_config.test_attr = "test_value"
return mock_config


def test_getitem_success(base_config_mock):
"""Test __getitem__ with existing attribute (happy path)."""
assert base_config_mock["test_attr"] == "test_value"


def test_getitem_nonexistent_attribute(base_config_mock):
"""Test __getitem__ with non-existent attribute (exception path 1)."""
with pytest.raises(AttributeError):
_ = base_config_mock["nonexistent_attr"]


def test_getitem_invalid_key_type(base_config_mock):
"""Test __getitem__ with invalid key type (exception path 2)."""
with pytest.raises(TypeError):
_ = base_config_mock[123] # type: ignore
13 changes: 13 additions & 0 deletions tests/trainer/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2024 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading