Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix error and add CI
  • Loading branch information
ETOgaosion committed May 27, 2025
commit 5a8612df0014752cd666f4f72d52a2a67b9648c0
6 changes: 5 additions & 1 deletion .github/workflows/e2e_ppo_trainer_megatron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ jobs:
run: |
ray stop --force
RESUME_MODE=auto MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) testing learning rate scheduler
run: |
ray stop --force
LR_WARMUP_STEPS=1 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh

- name: Test Megatron checkpoints merging function (Qwen3 Actor and Critic)
run: |
exp_name="qwen3-0.6b-megatron-gsm8k-minimal"
Expand Down Expand Up @@ -230,4 +235,3 @@ jobs:
- name: clean up
run: |
rm -rf checkpoints

5 changes: 4 additions & 1 deletion tests/e2e/run_ppo_trainer_megatron.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ CRITIC_GRAD_OFFLOAD=${CRITIC_GRAD_OFFLOAD:-$COMMON_GRAD_OFFLOAD}
CRITIC_OPTIMIZER_OFFLOAD=${CRITIC_OPTIMIZER_OFFLOAD:-$COMMON_OPTIMIZER_OFFLOAD}
RM_PARAM_OFFLOAD=${RM_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD}

LR_WARMUP_STEPS=${LR_WARMUP_STEPS:-null}

CHECKPOINT_CONTENTS=['model','hf_model','optimizer','extra']
SKIP_SAVE_HF_MODEL=${SKIP_SAVE_HF_MODEL:-0}
if [ $SKIP_SAVE_HF_MODEL -eq 1 ]; then
Expand All @@ -91,7 +93,7 @@ for ENGINE in "${ENGINES[@]}"; do
data.filter_overlong_prompts=True \
data.truncation='error' \
actor_rollout_ref.model.path="${MODEL_PATH}" \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.actor.optim.lr_warmup_steps=$LR_WARMUP_STEPS \
actor_rollout_ref.actor.ppo_mini_batch_size=${train_prompt_mini_bsz} \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${train_traj_micro_bsz_per_gpu} \
actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=$ACTOR_PP \
Expand All @@ -117,6 +119,7 @@ for ENGINE in "${ENGINES[@]}"; do
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=${train_traj_micro_bsz_per_gpu} \
actor_rollout_ref.ref.megatron.param_offload=${REF_PARAM_OFFLOAD} \
critic.optim.lr=2e-5 \
critic.optim.lr_warmup_steps=$LR_WARMUP_STEPS \
critic.model.path="${MODEL_PATH}" \
critic.model.enable_gradient_checkpointing=False \
critic.ppo_micro_batch_size_per_gpu=${train_traj_micro_bsz_per_gpu} \
Expand Down
4 changes: 2 additions & 2 deletions verl/trainer/config/ppo_megatron_trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ actor_rollout_ref:
clip_grad: 1.0
total_training_steps: -1 # must be override by program
lr_warmup_init: 0.0 # initial learning rate for warmup, default to 0.0
lr_warmup_steps: -1 # Prioritized. Negative values mean delegating to lr_warmup_steps_ratio.
lr_warmup_steps: null # Prioritized. None, 0 or Negative values mean delegating to lr_warmup_steps_ratio.
lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime
lr_decay_steps: null
lr_decay_style: linear # select from constant/linear/cosine/inverse_square_root
Expand Down Expand Up @@ -181,7 +181,7 @@ critic:
clip_grad: 1.0
total_training_steps: -1 # must be override by program
lr_warmup_init: 0.0 # initial learning rate for warmup, default to 0.0
lr_warmup_steps: -1 # Prioritized. Negative values mean delegating to lr_warmup_steps_ratio.
lr_warmup_steps: null # Prioritized. None, 0 or Negative values mean delegating to lr_warmup_steps_ratio.
lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime
lr_decay_steps: null
lr_decay_style: linear # select from constant/linear/cosine/inverse_square_root
Expand Down
6 changes: 3 additions & 3 deletions verl/utils/megatron/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ def get_megatron_optimizer_param_scheduler(
Get the optimizer parameter scheduler for Megatron.
"""
if config.get("lr_decay_steps", None) is None:
config.lr_decay_steps = config.total_train_steps
config.lr_decay_steps = config.total_training_steps
wsd_decay_steps = None
if config.get("lr_wsd_decay_steps", None) is not None:
wsd_decay_steps = config.lr_wsd_decay_steps
if config.get("lr_warmup_steps_ratio", None) is not None:
if config.get("lr_warmup_steps_ratio", None) is not None and (config.get("lr_warmup_steps", None) is None or config.lr_warmup_steps <= 0):
config.lr_warmup_steps = int(config.lr_warmup_steps_ratio * config.lr_decay_steps)

opt_param_scheduler = OptimizerParamScheduler(
Expand All @@ -60,7 +60,7 @@ def get_megatron_optimizer_param_scheduler(
lr_decay_style=config.lr_decay_style,
start_wd=config.weight_decay,
end_wd=config.weight_decay,
wd_incr_steps=config.total_train_steps,
wd_incr_steps=config.total_training_steps,
wd_incr_style=config.weight_decay_incr_style,
use_checkpoint_opt_param_scheduler=config.use_checkpoint_opt_param_scheduler,
override_opt_param_scheduler=(not config.use_checkpoint_opt_param_scheduler),
Expand Down
Loading