verl-project · wuxibin89 · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025
@@ -975,6 +975,11 @@ def fit(self):
 
         # load checkpoint before doing anything
         self._load_checkpoint()
+        # resume sampler state if needed
+        current_epoch = self.global_steps // len(self.train_dataloader)
+        for _ in range(current_epoch - 1):
+            for _ in iter(self.train_dataloader.sampler):
+                pass
-        for _ in range(current_epoch - 1):
-            for _ in iter(self.train_dataloader.sampler):
-                pass
+        if hasattr(self.train_dataloader.sampler, "set_epoch"):
+            # More efficient and standard way for DistributedSampler
+            self.train_dataloader.sampler.set_epoch(current_epoch)
+        else:
+            # Fallback for samplers without set_epoch, with correction.
+            # This is inefficient and should be avoided if possible.
+            for _ in range(current_epoch):
+                for _ in self.train_dataloader.sampler:
+                    pass
-        for _ in range(current_epoch - 1):
-            for _ in iter(self.train_dataloader.sampler):
-                pass
+        if hasattr(self.train_dataloader.sampler, "set_epoch"):
+            # More efficient and standard way for DistributedSampler
+            self.train_dataloader.sampler.set_epoch(current_epoch)
+        else:
+            # Fallback for samplers without set_epoch, with correction.
+            # This is inefficient and should be avoided if possible.
+            for _ in range(current_epoch):
+                for _ in self.train_dataloader.sampler:
+                    pass
 
         # perform validation before training
         # currently, we only support validation using the reward_function.
@@ -1006,7 +1011,7 @@ def fit(self):
         )
         next_step_profile = False
 
-        for epoch in range(self.config.trainer.total_epochs):
+        for epoch in range(current_epoch, self.config.trainer.total_epochs):
             for batch_dict in self.train_dataloader:
                 metrics = {}
                 timing_raw = {}