fix error and add CI

verl-project · vermouth1992 · Jun 7, 2025 · May 26, 2025 · May 26, 2025 · May 26, 2025
commit 5a8612df0014752cd666f4f72d52a2a67b9648c0
diff --git a/.github/workflows/e2e_ppo_trainer_megatron.yml b/.github/workflows/e2e_ppo_trainer_megatron.yml
@@ -112,6 +112,11 @@ jobs:
         run: |
           ray stop --force
           RESUME_MODE=auto MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh
+      - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) testing learning rate scheduler
+        run: |
+          ray stop --force
+          LR_WARMUP_STEPS=1 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/e2e/run_ppo_trainer_megatron.sh
+
       - name: Test Megatron checkpoints merging function (Qwen3 Actor and Critic)
         run: |
           exp_name="qwen3-0.6b-megatron-gsm8k-minimal"
@@ -230,4 +235,3 @@ jobs:
       - name: clean up
         run: |
           rm -rf checkpoints
-
diff --git a/tests/e2e/run_ppo_trainer_megatron.sh b/tests/e2e/run_ppo_trainer_megatron.sh
@@ -69,6 +69,8 @@ CRITIC_GRAD_OFFLOAD=${CRITIC_GRAD_OFFLOAD:-$COMMON_GRAD_OFFLOAD}
 CRITIC_OPTIMIZER_OFFLOAD=${CRITIC_OPTIMIZER_OFFLOAD:-$COMMON_OPTIMIZER_OFFLOAD}
 RM_PARAM_OFFLOAD=${RM_PARAM_OFFLOAD:-$COMMON_PARAM_OFFLOAD}
 
+LR_WARMUP_STEPS=${LR_WARMUP_STEPS:-null}
+
 CHECKPOINT_CONTENTS=['model','hf_model','optimizer','extra']
 SKIP_SAVE_HF_MODEL=${SKIP_SAVE_HF_MODEL:-0}
 if [ $SKIP_SAVE_HF_MODEL -eq 1 ]; then
@@ -91,7 +93,7 @@ for ENGINE in "${ENGINES[@]}"; do
         data.filter_overlong_prompts=True \
         data.truncation='error' \
         actor_rollout_ref.model.path="${MODEL_PATH}" \
-        actor_rollout_ref.actor.optim.lr=1e-6 \
+        actor_rollout_ref.actor.optim.lr_warmup_steps=$LR_WARMUP_STEPS \
         actor_rollout_ref.actor.ppo_mini_batch_size=${train_prompt_mini_bsz} \
         actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=${train_traj_micro_bsz_per_gpu} \
         actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=$ACTOR_PP \
@@ -117,6 +119,7 @@ for ENGINE in "${ENGINES[@]}"; do
         actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=${train_traj_micro_bsz_per_gpu} \
         actor_rollout_ref.ref.megatron.param_offload=${REF_PARAM_OFFLOAD} \
         critic.optim.lr=2e-5 \
+        critic.optim.lr_warmup_steps=$LR_WARMUP_STEPS \
         critic.model.path="${MODEL_PATH}" \
         critic.model.enable_gradient_checkpointing=False \
         critic.ppo_micro_batch_size_per_gpu=${train_traj_micro_bsz_per_gpu} \

@@ -64,7 +64,7 @@ actor_rollout_ref:
       clip_grad: 1.0
       total_training_steps: -1  # must be override by program
       lr_warmup_init: 0.0  # initial learning rate for warmup, default to 0.0
-      lr_warmup_steps: -1 # Prioritized. Negative values mean delegating to lr_warmup_steps_ratio.
+      lr_warmup_steps: null # Prioritized. None, 0 or Negative values mean delegating to lr_warmup_steps_ratio.
       lr_warmup_steps_ratio: 0.  # the total steps will be injected during runtime
       lr_decay_steps: null
       lr_decay_style: linear # select from constant/linear/cosine/inverse_square_root
@@ -181,7 +181,7 @@ critic:
     clip_grad: 1.0
     total_training_steps: -1  # must be override by program
     lr_warmup_init: 0.0  # initial learning rate for warmup, default to 0.0
-    lr_warmup_steps: -1 # Prioritized. Negative values mean delegating to lr_warmup_steps_ratio.
+    lr_warmup_steps: null # Prioritized. None, 0 or Negative values mean delegating to lr_warmup_steps_ratio.
     lr_warmup_steps_ratio: 0.  # the total steps will be injected during runtime
     lr_decay_steps: null
     lr_decay_style: linear # select from constant/linear/cosine/inverse_square_root

diff --git a/verl/utils/megatron/optimizer.py b/verl/utils/megatron/optimizer.py
@@ -43,11 +43,11 @@ def get_megatron_optimizer_param_scheduler(
     Get the optimizer parameter scheduler for Megatron.
     """
     if config.get("lr_decay_steps", None) is None:
-        config.lr_decay_steps = config.total_train_steps
+        config.lr_decay_steps = config.total_training_steps
     wsd_decay_steps = None
     if config.get("lr_wsd_decay_steps", None) is not None:
         wsd_decay_steps = config.lr_wsd_decay_steps
-    if config.get("lr_warmup_steps_ratio", None) is not None:
+    if config.get("lr_warmup_steps_ratio", None) is not None and (config.get("lr_warmup_steps", None) is None or config.lr_warmup_steps <= 0):
         config.lr_warmup_steps = int(config.lr_warmup_steps_ratio * config.lr_decay_steps)
 
     opt_param_scheduler = OptimizerParamScheduler(
@@ -60,7 +60,7 @@ def get_megatron_optimizer_param_scheduler(
         lr_decay_style=config.lr_decay_style,
         start_wd=config.weight_decay,
         end_wd=config.weight_decay,
-        wd_incr_steps=config.total_train_steps,
+        wd_incr_steps=config.total_training_steps,
         wd_incr_style=config.weight_decay_incr_style,
         use_checkpoint_opt_param_scheduler=config.use_checkpoint_opt_param_scheduler,
         override_opt_param_scheduler=(not config.use_checkpoint_opt_param_scheduler),