verl-project
diff --git a/‎.github/workflows/vllm.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/vllm.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/special_sanity/test_config_docs.py‎
Lines changed: 3 additions & 0 deletions b/‎tests/special_sanity/test_config_docs.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/trainer/config/test_legacy_config_on_cpu.py‎
Lines changed: 43 additions & 13 deletions b/‎tests/trainer/config/test_legacy_config_on_cpu.py‎
Lines changed: 43 additions & 13 deletions
diff --git a/‎tests/utils/dataset/test_create_rl_sampler_on_cpu.py‎
Lines changed: 3 additions & 2 deletions b/‎tests/utils/dataset/test_create_rl_sampler_on_cpu.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎tests/workers/rollout/rollout_vllm/test_vllm_model_rope_scaling.py‎
Lines changed: 131 additions & 0 deletions b/‎tests/workers/rollout/rollout_vllm/test_vllm_model_rope_scaling.py‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎tests/workers/rollout/test_sglang_multi_interaction.py‎
Lines changed: 27 additions & 0 deletions b/‎tests/workers/rollout/test_sglang_multi_interaction.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎verl/experimental/agent_loop/agent_loop.py‎
Lines changed: 1 addition & 0 deletions b/‎verl/experimental/agent_loop/agent_loop.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎verl/utils/dataset/sampler.py‎ ‎verl/experimental/dataset/sampler.py‎verl/utils/dataset/sampler.py renamed to verl/experimental/dataset/sampler.py
Lines changed: 5 additions & 1 deletion b/‎verl/utils/dataset/sampler.py‎ ‎verl/experimental/dataset/sampler.py‎verl/utils/dataset/sampler.py renamed to verl/experimental/dataset/sampler.py
Lines changed: 5 additions & 1 deletion
@@ -105,6 +105,9 @@ jobs:
       - name: Test the latest vLLM
         run: |
           torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s tests/workers/rollout/rollout_vllm/test_vllm_spmd.py
+      - name: Test the latest vLLM on model with rope scaling
+        run: |
+          torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s tests/workers/rollout/rollout_vllm/test_vllm_model_rope_scaling.py
       - name: Run Qwen 0.5B generation test
         run: |
           cd tests/special_e2e/generation
 
@@ -62,6 +62,9 @@ def test_trainer_config_doc():
         "verl/trainer/config/ppo_trainer.yaml",
         "verl/trainer/config/actor/actor.yaml",
         "verl/trainer/config/actor/dp_actor.yaml",
+        "verl/trainer/config/ref/ref.yaml",
+        "verl/trainer/config/ref/dp_ref.yaml",
+        "verl/trainer/config/rollout/rollout.yaml",
     ]
     success = True
     for yaml_to_inspect in yamls_to_inspect:
 
@@ -12,16 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
 
+from hydra import compose, initialize_config_dir
+from hydra.core.global_hydra import GlobalHydra
 from omegaconf import OmegaConf
 
 
 class TestConfigComparison(unittest.TestCase):
     """Test that current configs match their legacy counterparts exactly."""
 
-    def _compare_configs_recursively(self, current_config, legacy_config, path=""):
-        """Recursively compare two OmegaConf configs and assert they are identical."""
+    def _compare_configs_recursively(self, current_config, legacy_config, path="", legacy_allow_missing=False):
+        """Recursively compare two OmegaConf configs and assert they are identical.
+
+        Args:
+            legacy_allow_missing (bool): sometimes the legacy megatron config contains fewer keys and
+              we allow that to happen
+        """
         if isinstance(current_config, dict) and isinstance(legacy_config, dict):
             current_keys = set(current_config.keys())
             legacy_keys = set(legacy_config.keys())
@@ -32,19 +40,29 @@ def _compare_configs_recursively(self, current_config, legacy_config, path=""):
             if missing_in_current:
                 self.fail(f"Keys missing in current config at {path}: {missing_in_current}")
             if missing_in_legacy:
-                self.fail(f"Keys missing in legacy config at {path}: {missing_in_legacy}")
+                # if the legacy
+                msg = f"Keys missing in legacy config at {path}: {missing_in_legacy}"
+                if legacy_allow_missing:
+                    print(msg)
+                else:
+                    self.fail(msg)
 
             for key in current_keys:
                 current_path = f"{path}.{key}" if path else key
-                self._compare_configs_recursively(current_config[key], legacy_config[key], current_path)
+                if key in legacy_config:
+                    self._compare_configs_recursively(
+                        current_config[key], legacy_config[key], current_path, legacy_allow_missing=legacy_allow_missing
+                    )
         elif isinstance(current_config, list) and isinstance(legacy_config, list):
             self.assertEqual(
                 len(current_config),
                 len(legacy_config),
                 f"List lengths differ at {path}: current={len(current_config)}, legacy={len(legacy_config)}",
             )
             for i, (current_item, legacy_item) in enumerate(zip(current_config, legacy_config)):
-                self._compare_configs_recursively(current_item, legacy_item, f"{path}[{i}]")
+                self._compare_configs_recursively(
+                    current_item, legacy_item, f"{path}[{i}]", legacy_allow_missing=legacy_allow_missing
+                )
         else:
             self.assertEqual(
                 current_config,
@@ -66,7 +84,6 @@ def test_ppo_trainer_config_matches_legacy(self):
                 current_config = compose(config_name="ppo_trainer")
 
             legacy_config = OmegaConf.load("tests/trainer/config/legacy_ppo_trainer.yaml")
-
             current_dict = OmegaConf.to_container(current_config, resolve=True)
             legacy_dict = OmegaConf.to_container(legacy_config, resolve=True)
 
@@ -79,29 +96,42 @@ def test_ppo_trainer_config_matches_legacy(self):
 
     def test_ppo_megatron_trainer_config_matches_legacy(self):
         """Test that ppo_megatron_trainer.yaml matches legacy_ppo_megatron_trainer.yaml exactly."""
-        import os
-
-        from hydra import compose, initialize_config_dir
-        from hydra.core.global_hydra import GlobalHydra
 
         GlobalHydra.instance().clear()
 
         try:
-            with initialize_config_dir(config_dir=os.path.abspath("verl/trainer/config"), version_base=None):
+            with initialize_config_dir(config_dir=os.path.abspath("verl/trainer/config")):
                 current_config = compose(config_name="ppo_megatron_trainer")
 
             legacy_config = OmegaConf.load("tests/trainer/config/legacy_ppo_megatron_trainer.yaml")
-
             current_dict = OmegaConf.to_container(current_config, resolve=True)
             legacy_dict = OmegaConf.to_container(legacy_config, resolve=True)
 
             if "defaults" in current_dict:
                 del current_dict["defaults"]
 
-            self._compare_configs_recursively(current_dict, legacy_dict)
+            self._compare_configs_recursively(current_dict, legacy_dict, legacy_allow_missing=True)
         finally:
             GlobalHydra.instance().clear()
 
+    def test_load_component(self):
+        """Test that ppo_megatron_trainer.yaml matches legacy_ppo_megatron_trainer.yaml exactly."""
+
+        GlobalHydra.instance().clear()
+        configs_to_load = [
+            ("verl/trainer/config/actor", "dp_actor"),
+            ("verl/trainer/config/actor", "megatron_actor"),
+            ("verl/trainer/config/ref", "dp_ref"),
+            ("verl/trainer/config/ref", "megatron_ref"),
+            ("verl/trainer/config/rollout", "rollout"),
+        ]
+        for config_dir, config_file in configs_to_load:
+            try:
+                with initialize_config_dir(config_dir=os.path.abspath(config_dir)):
+                    compose(config_name=config_file)
+            finally:
+                GlobalHydra.instance().clear()
+
 
 if __name__ == "__main__":
     unittest.main()
@@ -22,8 +22,8 @@
 from omegaconf import DictConfig, OmegaConf
 from torch.utils.data import Dataset, RandomSampler
 
+from verl.experimental.dataset.sampler import AbstractCurriculumSampler
 from verl.trainer.main_ppo import create_rl_sampler
-from verl.utils.dataset.sampler import AbstractCurriculumSampler
 
 
 class RandomCurriculumSampler(AbstractCurriculumSampler):
@@ -77,10 +77,11 @@ def __len__(self):
 def test_create_custom_curriculum_samper():
     data_config = OmegaConf.create(
         {
+            "dataloader_num_workers": 0,
             "sampler": {
                 "class_path": "pkg://tests.utils.dataset.test_create_rl_sampler_on_cpu",
                 "class_name": "RandomCurriculumSampler",
-            }
+            },
         }
     )
 
 
@@ -0,0 +1,131 @@
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+
+import torch
+import torch.distributed
+import torch.distributed as dist
+from omegaconf import OmegaConf
+from transformers import AutoConfig, AutoTokenizer
+
+from verl import DataProto
+from verl.utils.distributed import initialize_global_process_group
+from verl.utils.model import compute_position_id_with_mask
+from verl.workers.rollout.vllm_rollout.vllm_rollout_spmd import vLLMRollout
+
+
+def test_vllm_rollout_with_yarn_position_embeddings():
+    """
+    Test the vLLM rollout with yarn position embeddings.
+    """
+
+    local_rank, rank, world_size = initialize_global_process_group()
+    config = OmegaConf.create(
+        {
+            "model_path": "OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN",
+            "prompt_length": 35000,
+            "response_length": 512,
+            "dtype": "bfloat16",
+            "enforce_eager": True,
+            "gpu_memory_utilization": 0.4,
+            "enable_chunked_prefill": False,
+            "free_cache_engine": False,
+            "disable_log_stats": True,
+            "max_model_len": 35000 + 512,
+            "load_format": "auto",
+            "val_kwargs": {
+                "top_k": -1,
+                "top_p": 1.0,
+                "temperature": 0,
+                "n": 1,
+                "do_sample": False,
+            },
+            "tensor_model_parallel_size": 4,
+            "trust_remote_code": True,
+            "calculate_log_probs": False,
+            "do_sample": False,
+            "temperature": 0.0,
+            "max_num_batched_tokens": 35000 + 512,
+        }
+    )
+
+    tokenizer = AutoTokenizer.from_pretrained(config.model_path, trust_remote_code=True, padding_side="left")
+    tokenizer.pad_token = tokenizer.eos_token
+    model_hf_config = AutoConfig.from_pretrained(config.model_path)
+
+    # do_sample=False for temperate=0 deterministic
+    input_dataproto = prepare_input_dataproto(tokenizer, config, validate=True, do_sample=False)
+
+    vllm_rollout = vLLMRollout(
+        model_path=config.model_path,
+        config=config,
+        tokenizer=tokenizer,
+        model_hf_config=model_hf_config,
+    )
+    # rollout
+    rollout_response = vllm_rollout.generate_sequences(
+        prompts=input_dataproto,
+    )
+    if rank == 0:
+        print("VLLM Rollout Outputs:")
+        print(tokenizer.batch_decode(rollout_response.batch["responses"][:], skip_special_tokens=False))
+        for response in rollout_response.batch["responses"]:
+            assert "<|im_end|>" in tokenizer.decode(response, skip_special_tokens=False), (
+                "Response should contain <|im_end|> token"
+            )
+    print("Checks passed.")
+
+    del vllm_rollout
+    gc.collect()
+    torch.cuda.empty_cache()
+    torch.cuda.ipc_collect()
+    dist.barrier()
+    torch.distributed.destroy_process_group()
+
+
+def prepare_input_dataproto(tokenizer, config, validate, do_sample=False):
+    base_phrase = "Roses are red, sky is blue. " * 4096
+    preencode_prompts = [
+        # 32810 tokens > 32768 tokens
+        [{"role": "user", "content": base_phrase + "Who won the Champions League in 2019?"}],
+        [{"role": "user", "content": base_phrase + "The founder of Apple is"}],
+        [{"role": "user", "content": base_phrase + "What's your name"}],
+    ]
+    formatted_prompts = [
+        tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
+        for conversation in preencode_prompts
+    ]
+    prompts = tokenizer(formatted_prompts, return_tensors="pt", padding="max_length", max_length=config.prompt_length)
+    input_dataproto = DataProto.from_dict(
+        {
+            "input_ids": prompts["input_ids"],
+            "attention_mask": prompts["attention_mask"],
+            "position_ids": compute_position_id_with_mask(prompts["attention_mask"]),
+        },
+        meta_info={
+            "bos_token_id": tokenizer.bos_token_id,
+            "eos_token_id": tokenizer.eos_token_id,
+            "pad_token_id": tokenizer.pad_token_id,
+            "validate": validate,
+            "do_sample": do_sample,
+            "response_length": config.response_length,
+            "temperature": config.temperature,
+        },
+    )
+    return input_dataproto
+
+
+if __name__ == "__main__":
+    test_vllm_rollout_with_yarn_position_embeddings()
@@ -127,6 +127,13 @@ def test_initialize_multiple_interactions(self):
                 # Mock model config
                 mock_model_config = MagicMock()
                 mock_model_config.max_position_embeddings = 2048
+                # since this is a mock, we can set any rope scaling config
+                # to test the rope_scaling logic at the same time of this test
+                mock_model_config.rope_scaling = {
+                    "factor": 4.0,
+                    "original_max_position_embeddings": 32768,
+                    "type": "yarn",
+                }
 
                 # Create SGLangRollout instance
                 rollout = SGLangRollout(
@@ -173,6 +180,11 @@ def test_interaction_selection_by_name(self):
 
                 mock_model_config = MagicMock()
                 mock_model_config.max_position_embeddings = 2048
+                mock_model_config.rope_scaling = {
+                    "factor": 4.0,
+                    "original_max_position_embeddings": 32768,
+                    "type": "yarn",
+                }
 
                 rollout = SGLangRollout(
                     actor_module="mock_model",
@@ -278,6 +290,11 @@ def test_fallback_to_default_interaction(self):
 
                 mock_model_config = MagicMock()
                 mock_model_config.max_position_embeddings = 2048
+                mock_model_config.rope_scaling = {
+                    "factor": 4.0,
+                    "original_max_position_embeddings": 32768,
+                    "type": "yarn",
+                }
 
                 rollout = SGLangRollout(
                     actor_module="mock_model",
@@ -312,6 +329,11 @@ def test_error_on_missing_interaction(self):
 
                 mock_model_config = MagicMock()
                 mock_model_config.max_position_embeddings = 2048
+                mock_model_config.rope_scaling = {
+                    "factor": 4.0,
+                    "original_max_position_embeddings": 32768,
+                    "type": "yarn",
+                }
 
                 rollout = SGLangRollout(
                     actor_module="mock_model",
@@ -374,6 +396,11 @@ def test_backward_compatibility_no_interaction_config(self):
 
             mock_model_config = MagicMock()
             mock_model_config.max_position_embeddings = 2048
+            mock_model_config.rope_scaling = {
+                "factor": 4.0,
+                "original_max_position_embeddings": 32768,
+                "type": "yarn",
+            }
 
             rollout = SGLangRollout(
                 actor_module="mock_model",
 
@@ -351,6 +351,7 @@ def _postprocess(self, inputs: List[AgentLoopOutput]) -> DataProto:
 
 
 async def get_trajectory_info(step, index):
+    """Get the trajectory info (step, sample_index, rollout_n) asynchrously"""
     trajectory_info = []
     rollout_n = 0
     for i in range(len(index)):
 
@@ -21,16 +21,20 @@
 
 
 class AbstractSampler(Sampler[int]):
+    """Abstract interface for custom samplers."""
+
     @abstractmethod
     def __init__(
         self,
         data_source: Sized,
-        config: DictConfig,
+        data_config: DictConfig,
     ):
         pass
 
 
 class AbstractCurriculumSampler(AbstractSampler):
+    """Experimental interface for curriculum learning samplers."""
+
     @abstractmethod
     def update(self, batch: DataProto) -> None:
         pass
Original file line number	Diff line number	Diff line change
`@@ -22,8 +22,8 @@`
`22`	`22`	`from omegaconf import DictConfig, OmegaConf`
`23`	`23`	`from torch.utils.data import Dataset, RandomSampler`
`24`	`24`
	`25`	`+from verl.experimental.dataset.sampler import AbstractCurriculumSampler`
`25`	`26`	`from verl.trainer.main_ppo import create_rl_sampler`
`26`		`-from verl.utils.dataset.sampler import AbstractCurriculumSampler`
`27`	`27`
`28`	`28`
`29`	`29`	`class RandomCurriculumSampler(AbstractCurriculumSampler):`
`@@ -77,10 +77,11 @@ def __len__(self):`
`77`	`77`	`def test_create_custom_curriculum_samper():`
`78`	`78`	`data_config = OmegaConf.create(`
`79`	`79`	`{`
	`80`	`+ "dataloader_num_workers": 0,`
`80`	`81`	`"sampler": {`
`81`	`82`	`"class_path": "pkg://tests.utils.dataset.test_create_rl_sampler_on_cpu",`
`82`	`83`	`"class_name": "RandomCurriculumSampler",`
`83`		`- }`
	`84`	`+ },`
`84`	`85`	`}`
`85`	`86`	`)`
`86`	`87`