Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
reduce mem
  • Loading branch information
strint committed Sep 14, 2022
commit a8e3d43a619a643919bf08abea4d6da890af83c4
4 changes: 2 additions & 2 deletions configs/t5_large_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

# T5-large model config
model.cfg.num_attention_heads = 12
model.cfg.hidden_size = 384
model.cfg.hidden_size = 12
model.cfg.hidden_layers = 6
model.cfg.scale_mask_softmax_fusion = False
model.cfg.bias_dropout_fusion = False
Expand All @@ -33,7 +33,7 @@
train.dist.pipeline_num_layers = 2 * model.cfg.hidden_layers


train.train_micro_batch_size = 16
train.train_micro_batch_size = 1
train.amp.enabled = True

train.evaluation.evaluator = LazyCall(PPLEvaluator)()
Expand Down