Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
4a4dbc0
add support for custom datagen class that allows for adding new data …
jwong8314 Jul 1, 2025
9d599ce
ruff
jwong8314 Jul 1, 2025
d91b626
ruff-format
jwong8314 Jul 1, 2025
84b3815
ruff-format
jwong8314 Jul 1, 2025
3c1cf80
Update license
jwong8314 Jul 1, 2025
9c65168
update license
jwong8314 Jul 1, 2025
8a04aca
fix: make sure if there's not data_generatore it doesn't crash
jwong8314 Jul 1, 2025
87b89d0
ruff-format
jwong8314 Jul 1, 2025
732b184
Merge branch 'main' into main
zhaochenyang20 Jul 2, 2025
ffba50d
Merge branch 'main' into dynamic_dataset
jwong8314 Jul 4, 2025
c620bcb
undo change to import_utils
jwong8314 Jul 4, 2025
6b061f9
merging into dataset
jwong8314 Jul 4, 2025
13debde
Merge pull request #1 from jwong8314/dynamic_dataset
jwong8314 Jul 4, 2025
19201e2
rename variables
jwong8314 Jul 4, 2025
72b223e
is_train rename
jwong8314 Jul 4, 2025
383cf61
Merge pull request #2 from jwong8314/dynamic_dataset
jwong8314 Jul 4, 2025
5070088
rename
jwong8314 Jul 4, 2025
3250d1d
rename to Generator
jwong8314 Jul 4, 2025
8c48f09
Merge pull request #3 from jwong8314/dynamic_dataset
jwong8314 Jul 4, 2025
0a5cabf
Merge branch 'main' into main
zhaochenyang20 Jul 4, 2025
4aac878
add parameter for batch information
jwong8314 Jul 8, 2025
e3bbd57
add comments and placed files in experimental
jwong8314 Jul 8, 2025
122e817
move to experimental subdir
jwong8314 Jul 8, 2025
a74ff75
ruff
jwong8314 Jul 8, 2025
2e44ead
ruff
jwong8314 Jul 8, 2025
6126b96
Merge branch 'volcengine:main' into main
jwong8314 Jul 8, 2025
16647d4
patch CI
jwong8314 Jul 8, 2025
171c9be
Merge branch 'main' into main
jwong8314 Jul 9, 2025
314d350
resolve conflicts new yaml
jwong8314 Jul 9, 2025
4bd1452
typo
jwong8314 Jul 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
is_train rename
  • Loading branch information
jwong8314 committed Jul 4, 2025
commit 72b223eab9bdf27d6b8dd52ca78f155994a62d7c
10 changes: 5 additions & 5 deletions verl/trainer/main_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ def run(self, config):
from verl.utils.dataset.rl_dataset import collate_fn

# Create training and validation datasets.
train_dataset = create_rl_dataset(config.data.train_files, config.data, tokenizer, processor, train=True)
val_dataset = create_rl_dataset(config.data.val_files, config.data, tokenizer, processor, train=False)
train_dataset = create_rl_dataset(config.data.train_files, config.data, tokenizer, processor, is_train=True)
val_dataset = create_rl_dataset(config.data.val_files, config.data, tokenizer, processor, is_train=False)
train_sampler = create_rl_sampler(config.data, train_dataset)

# Initialize the PPO trainer.
Expand All @@ -214,7 +214,7 @@ def run(self, config):
trainer.fit()


def create_rl_dataset(data_paths, data_config, tokenizer, processor, train=True):
def create_rl_dataset(data_paths, data_config, tokenizer, processor, is_train=True):
"""Create a dataset.

Arguments:
Expand Down Expand Up @@ -243,8 +243,8 @@ def create_rl_dataset(data_paths, data_config, tokenizer, processor, train=True)
f"The custom dataset class '{data_config.custom_cls.name}' from "
f"'{data_config.custom_cls.path}' must inherit from torch.utils.data.Dataset"
)
elif "datagen" in data_config and data_config.datagen.get("path", None) is not None and train:
# If a data generation strategy is specified, use the DataGenDataset class
elif "datagen" in data_config and data_config.datagen.get("path", None) is not None and is_train:
# If a data generation strategy is specified, use the DynamicGenDataset class
from verl.utils.dataset.dynamicgen_dataset import DynamicGenDataset

dataset_cls = DynamicGenDataset
Expand Down