Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
4a4dbc0
add support for custom datagen class that allows for adding new data …
jwong8314 Jul 1, 2025
9d599ce
ruff
jwong8314 Jul 1, 2025
d91b626
ruff-format
jwong8314 Jul 1, 2025
84b3815
ruff-format
jwong8314 Jul 1, 2025
3c1cf80
Update license
jwong8314 Jul 1, 2025
9c65168
update license
jwong8314 Jul 1, 2025
8a04aca
fix: make sure if there's not data_generatore it doesn't crash
jwong8314 Jul 1, 2025
87b89d0
ruff-format
jwong8314 Jul 1, 2025
732b184
Merge branch 'main' into main
zhaochenyang20 Jul 2, 2025
ffba50d
Merge branch 'main' into dynamic_dataset
jwong8314 Jul 4, 2025
c620bcb
undo change to import_utils
jwong8314 Jul 4, 2025
6b061f9
merging into dataset
jwong8314 Jul 4, 2025
13debde
Merge pull request #1 from jwong8314/dynamic_dataset
jwong8314 Jul 4, 2025
19201e2
rename variables
jwong8314 Jul 4, 2025
72b223e
is_train rename
jwong8314 Jul 4, 2025
383cf61
Merge pull request #2 from jwong8314/dynamic_dataset
jwong8314 Jul 4, 2025
5070088
rename
jwong8314 Jul 4, 2025
3250d1d
rename to Generator
jwong8314 Jul 4, 2025
8c48f09
Merge pull request #3 from jwong8314/dynamic_dataset
jwong8314 Jul 4, 2025
0a5cabf
Merge branch 'main' into main
zhaochenyang20 Jul 4, 2025
4aac878
add parameter for batch information
jwong8314 Jul 8, 2025
e3bbd57
add comments and placed files in experimental
jwong8314 Jul 8, 2025
122e817
move to experimental subdir
jwong8314 Jul 8, 2025
a74ff75
ruff
jwong8314 Jul 8, 2025
2e44ead
ruff
jwong8314 Jul 8, 2025
6126b96
Merge branch 'volcengine:main' into main
jwong8314 Jul 8, 2025
16647d4
patch CI
jwong8314 Jul 8, 2025
171c9be
Merge branch 'main' into main
jwong8314 Jul 9, 2025
314d350
resolve conflicts new yaml
jwong8314 Jul 9, 2025
4bd1452
typo
jwong8314 Jul 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'main' into main
  • Loading branch information
jwong8314 authored Jul 9, 2025
commit 171c9be0a4a7a32063e4af1e784f4c30e96e7f7d
112 changes: 1 addition & 111 deletions verl/trainer/config/ppo_trainer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,123 +19,13 @@ defaults:
# Reference model will be enabled when actor.use_kl_loss or/and algorithm.use_kl_in_reward is/are True.
- ref@actor_rollout_ref.ref: dp_ref


# Rollout model config.
- rollout@actor_rollout_ref.rollout: rollout

# self config override anything above
- _self_

# dataset config
data:

# Tokenizer class or path. If null, it will be inferred from the model.
tokenizer: null

# Whether to use shared memory for data loading.
use_shm: False

# Training set parquet. Can be a list or a single file.
# The program will read all files into memory, so it can't be too large (< 100GB).
# The path can be either a local path or an HDFS path.
# For HDFS path, we provide utils to download it to DRAM and convert it to a local path.
train_files: ~/data/rlhf/gsm8k/train.parquet

# Validation parquet. Can be a list or a single file.
val_files: ~/data/rlhf/gsm8k/test.parquet

# The field in the dataset where the prompt is located. Default is 'prompt'.
prompt_key: prompt

# The field used to select the reward function (if using different ones per example).
reward_fn_key: data_source

# Maximum prompt length. All prompts will be left-padded to this length.
# An error will be reported if the length is too long.
max_prompt_length: 512

# Maximum response length. Rollout in RL algorithms (e.g. PPO) generates up to this length.
max_response_length: 512

# Batch size sampled for one training iteration of different RL algorithms.
train_batch_size: 1024

# Batch size used during validation. Can be null.
val_batch_size: null

# Whether to return the original input_ids without adding chat template.
# This is used when the reward model's chat template differs from the policy.
# If using a model-based RM with different templates, this should be True.
return_raw_input_ids: False

# Whether to return the original chat (prompt) without applying chat template.
return_raw_chat: False

# Whether to return the full prompt with chat template.
return_full_prompt: False

# Whether to shuffle the data in the dataloader.
shuffle: True

# num dataloader workers
dataloader_num_workers: 8

# Whether to shuffle the validation set.
validation_shuffle: False

# Whether to filter overlong prompts.
filter_overlong_prompts: False

# Number of workers for filtering overlong prompts.
# For large-scale datasets, filtering can be time-consuming.
# Use multiprocessing to speed up. Default is 1.
filter_overlong_prompts_workers: 1

# Truncate the input_ids or prompt if they exceed max_prompt_length.
# Options: 'error', 'left', or 'right'. Default is 'error'.
truncation: error

# The field in the multi-modal dataset where the image is located. Default is 'images'.
image_key: images

# The field in the multi-modal dataset where the video is located.
video_key: videos

# If the remote tokenizer has a Python file, this flag determines whether to allow using it.
trust_remote_code: False

# Optional: specify a custom dataset class path and name if overriding default loading behavior.
custom_cls:

# The path to the file containing your customized dataset class. If not specified, pre-implemented dataset will be used.
path: null

# The name of the dataset class within the specified file.
name: null

# Whether to return multi-modal inputs in the dataset. Set to False if rollout generates new multi-modal inputs.
return_multi_modal_inputs: True

# Data generation configuration for augmenting the dataset.
datagen:

# The path to the file containing your customized data generation class.
# E.g. 'pkg://verl.experimental.dynamic_dataset.dynamicgen_dataset'
path: null

# The class name of the data generation class within the specified file.
# E.g. 'MockDataGenerator'
name: null

# settings related to data sampler
sampler:

# the path to the module containing a curriculum class which implements the
# AbstractSampler interface
class_path: null

# the name of the curriculum class like `MySampler`
class_name: null

# config for actor, rollout and reference model
actor_rollout_ref:

Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.