Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
7c9e41d
fix(rollout_corr): compute metrics in actor for bypass mode and fix t…
szrlee Nov 10, 2025
96ae2be
docs(rollout_corr): move to algo/ and add pure_rs preset
szrlee Nov 10, 2025
c0ea9bd
feat(rollout_corr): add batch normalization option for IS weights
szrlee Nov 10, 2025
7de6c5f
docs(rollout_corr_math): use REINFORCE in aggregation loss examples f…
szrlee Nov 10, 2025
2b34cfe
refactor(rollout_corr): simplify metrics computation by removing unus…
szrlee Nov 10, 2025
0c42f85
docs(rollout_corr): add prominent cross-references between usage and …
szrlee Nov 10, 2025
fef8a48
docs(rollout_corr_math): add dedicated section for batch normalization
szrlee Nov 10, 2025
08cc9c7
fix: docstring of compute_policy_loss_with_rollout_correction
tongyx361 Nov 11, 2025
437a4ab
feat: reuse need_recomputation instead of bypass_mode
tongyx361 Nov 11, 2025
5f9a53b
feat: improve comments
tongyx361 Nov 11, 2025
b2f6370
feat: improve comments
tongyx361 Nov 11, 2025
79cdbf2
feat: refactor bypass_recomputing_logprobs
tongyx361 Nov 11, 2025
62e3270
feat(rollout_corr): align batch normalization with IS aggregation level
szrlee Nov 11, 2025
b5c19ff
docs(rollout_corr): rename decoupled mode presets for clarity and upd…
szrlee Nov 11, 2025
11f9aa0
fix(rollout_corr): correct metrics computation to run in decoupled mo…
szrlee Nov 11, 2025
58565cb
docs(rollout_corr): rename presets for clarity and consistency
szrlee Nov 11, 2025
8bb1a0e
refactor(rollout_corr): rename config vars for semantic clarity
szrlee Nov 11, 2025
6002c00
refactor(rollout_corr): update implementation to use renamed config v…
szrlee Nov 11, 2025
7f9ba9c
Merge branch 'main' into pr/szrlee/4070
tongyx361 Nov 11, 2025
56f69bf
fix: ppo_trainer config format
tongyx361 Nov 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: improve comments
  • Loading branch information
tongyx361 committed Nov 11, 2025
commit 5f9a53bfab887589ef4bfa4af249272e58f62f39
29 changes: 13 additions & 16 deletions verl/workers/actor/dp_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,23 +462,20 @@ def update_policy(self, data: DataProto):
)
micro_batch_metrics.update(pg_metrics)

# Compute rollout_corr metrics during training (for monitoring drift)
# This computes metrics using CURRENT policy log_prob vs rollout_log_prob
# to track off-policy gap as training progresses (different from trainer metrics
# which use old_log_prob and only show gap at start of training)
# Skip if using pure rollout correction mode (metrics already in pg_metrics)
# Only computed in bypass mode where rollout_log_probs are available
if loss_mode != "rollout_correction":
rollout_log_prob = model_inputs.get("rollout_log_probs", None)
if rollout_log_prob is not None:
from verl.trainer.ppo.rollout_corr_helper import compute_rollout_corr_metrics_from_logprobs

rollout_corr_metrics = compute_rollout_corr_metrics_from_logprobs(
log_prob=log_prob,
rollout_log_prob=rollout_log_prob,
response_mask=response_mask,
)
micro_batch_metrics.update(rollout_corr_metrics)
rollout_log_prob = model_inputs.get("rollout_log_probs", None)
if loss_mode != "rollout_correction" and rollout_log_prob is not None:
# This computes metrics using CURRENT policy log_prob vs rollout_log_prob
# to track off-policy gap as training progresses (different from trainer metrics
# which use old_log_prob and only show gap at start of training)
from verl.trainer.ppo.rollout_corr_helper import compute_rollout_corr_metrics_from_logprobs

rollout_corr_metrics = compute_rollout_corr_metrics_from_logprobs(
log_prob=log_prob,
rollout_log_prob=rollout_log_prob,
response_mask=response_mask,
)
micro_batch_metrics.update(rollout_corr_metrics)

if entropy_coeff != 0:
entropy_loss = agg_loss(loss_mat=entropy, loss_mask=response_mask, loss_agg_mode=loss_agg_mode)
Expand Down
Loading