make _recompute deal with lists instead of tuples

Signed-off-by: Jaime Cardenas <[email protected]>
NVIDIA · jaimec00 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 28, 2025
commit f0670ed159f7bb88fc5644fdb593ebd32d7411e8
diff --git a/transformer_engine/pytorch/module/layernorm_mlp.py b/transformer_engine/pytorch/module/layernorm_mlp.py
@@ -439,7 +439,7 @@ def _forward(
             # FP8 cast to workspace buffer
             update_workspace = (
                 is_first_microbatch is None or is_first_microbatch
-            ) and not is_recomputation  # only update workspace if not checkpointing or checkpointing with no recomp, otherwise cache workspace
+            ) #and not is_recomputation  # only update workspace if not checkpointing or checkpointing with no recomp, otherwise cache workspace
             fc1_weight_quantizer.set_usage(rowwise=True, columnwise=is_grad_enabled)
             fc2_weight_quantizer.set_usage(rowwise=True, columnwise=is_grad_enabled)
             fc1_weight_final = module.get_weight_workspace(
@@ -790,7 +790,7 @@ def _forward(
 
             ctx.wgrad_store = wgrad_store
             if is_recomputation:  # return the recomputed tensors
-                return (
+                return [
                     ctx,
                     inputmat,
                     ln_weight,
@@ -806,7 +806,7 @@ def _forward(
                     fc2_bias,
                     mu,
                     rsigma,
-                )
+                ]
 
         # we only get to this point if we are not recomputing for bwd, since that would have returned in the block above
         if return_layernorm_output: