Merge branch 'main' into main

verl-project · zhaochenyang20 · Jul 9, 2025 · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025
commit 171c9be0a4a7a32063e4af1e784f4c30e96e7f7d
diff --git a/verl/trainer/config/ppo_trainer.yaml b/verl/trainer/config/ppo_trainer.yaml
@@ -19,123 +19,13 @@ defaults:
   # Reference model will be enabled when actor.use_kl_loss or/and algorithm.use_kl_in_reward is/are True.
   - ref@actor_rollout_ref.ref: dp_ref
 
+
   # Rollout model config.
   - rollout@actor_rollout_ref.rollout: rollout
 
   # self config override anything above
   - _self_
 
-# dataset config
-data:
-
-  # Tokenizer class or path. If null, it will be inferred from the model.
-  tokenizer: null
-
-  # Whether to use shared memory for data loading.
-  use_shm: False
-
-  # Training set parquet. Can be a list or a single file.
-  # The program will read all files into memory, so it can't be too large (< 100GB).
-  # The path can be either a local path or an HDFS path.
-  # For HDFS path, we provide utils to download it to DRAM and convert it to a local path.
-  train_files: ~/data/rlhf/gsm8k/train.parquet
-
-  # Validation parquet. Can be a list or a single file.
-  val_files: ~/data/rlhf/gsm8k/test.parquet
-
-  # The field in the dataset where the prompt is located. Default is 'prompt'.
-  prompt_key: prompt
-
-  # The field used to select the reward function (if using different ones per example).
-  reward_fn_key: data_source
-
-  # Maximum prompt length. All prompts will be left-padded to this length.
-  # An error will be reported if the length is too long.
-  max_prompt_length: 512
-
-  # Maximum response length. Rollout in RL algorithms (e.g. PPO) generates up to this length.
-  max_response_length: 512
-
-  # Batch size sampled for one training iteration of different RL algorithms.
-  train_batch_size: 1024
-
-  # Batch size used during validation. Can be null.
-  val_batch_size: null
-
-  # Whether to return the original input_ids without adding chat template.
-  # This is used when the reward model's chat template differs from the policy.
-  # If using a model-based RM with different templates, this should be True.
-  return_raw_input_ids: False
-
-  # Whether to return the original chat (prompt) without applying chat template.
-  return_raw_chat: False
-
-  # Whether to return the full prompt with chat template.
-  return_full_prompt: False
-
-  # Whether to shuffle the data in the dataloader.
-  shuffle: True
-
-  # num dataloader workers
-  dataloader_num_workers: 8
-
-  # Whether to shuffle the validation set.
-  validation_shuffle: False
-
-  # Whether to filter overlong prompts.
-  filter_overlong_prompts: False
-
-  # Number of workers for filtering overlong prompts.
-  # For large-scale datasets, filtering can be time-consuming.
-  # Use multiprocessing to speed up. Default is 1.
-  filter_overlong_prompts_workers: 1
-
-  # Truncate the input_ids or prompt if they exceed max_prompt_length.
-  # Options: 'error', 'left', or 'right'. Default is 'error'.
-  truncation: error
-
-  # The field in the multi-modal dataset where the image is located. Default is 'images'.
-  image_key: images
-
-  # The field in the multi-modal dataset where the video is located.
-  video_key: videos
-
-  # If the remote tokenizer has a Python file, this flag determines whether to allow using it.
-  trust_remote_code: False
-
-  # Optional: specify a custom dataset class path and name if overriding default loading behavior.
-  custom_cls:
-
-    # The path to the file containing your customized dataset class. If not specified, pre-implemented dataset will be used.
-    path: null
-
-    # The name of the dataset class within the specified file.
-    name: null
-
-  # Whether to return multi-modal inputs in the dataset. Set to False if rollout generates new multi-modal inputs.
-  return_multi_modal_inputs: True
-
-  # Data generation configuration for augmenting the dataset.
-  datagen:
-
-    # The path to the file containing your customized data generation class.
-    # E.g. 'pkg://verl.experimental.dynamic_dataset.dynamicgen_dataset'
-    path: null
-
-    # The class name of the data generation class within the specified file.
-    # E.g. 'MockDataGenerator'
-    name: null
-
-  # settings related to data sampler
-  sampler:
-
-    # the path to the module containing a curriculum class which implements the
-    # AbstractSampler interface
-    class_path: null
-
-    # the name of the curriculum class like `MySampler`
-    class_name: null
-
 # config for actor, rollout and reference model
 actor_rollout_ref: