Skip to content

Commit 574c981

Browse files
author
Lukasz Kaiser
committed
Final tweaks for lower batch size.
1 parent 91ff146 commit 574c981

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

neural_gpu/neural_gpu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def __init__(self, nmaps, vec_size, niclass, noclass, dropout, rx_step,
151151
tf.constant(0, dtype=tf.int32, shape=[1]),
152152
tf.zeros([1, vec_size]))
153153

154-
adam = tf.train.AdamOptimizer(0.01*self.lr, epsilon=1e-4)
154+
adam = tf.train.AdamOptimizer(self.lr, epsilon=1e-4)
155155

156156
# Main graph creation loop, for every bin in data_utils.
157157
self.steps = []

neural_gpu/neural_gpu_trainer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import data_utils as data
3232
import neural_gpu
3333

34-
tf.app.flags.DEFINE_float("lr", 0.3, "Learning rate.")
34+
tf.app.flags.DEFINE_float("lr", 0.003, "Learning rate.")
3535
tf.app.flags.DEFINE_float("init_weight", 1.0, "Initial weights deviation.")
3636
tf.app.flags.DEFINE_float("max_grad_norm", 0.05, "Clip gradients to this norm.")
3737
tf.app.flags.DEFINE_float("cutoff", 1.2, "Cutoff at the gates.")
@@ -215,7 +215,7 @@ def train():
215215
start_time = time.time()
216216
inp, target = data.get_batch(l, batch_size, True, task)
217217
noise_param = math.sqrt(math.pow(global_step, -0.55) *
218-
(20 * prev_seq_err)) * FLAGS.grad_noise_scale
218+
prev_seq_err) * FLAGS.grad_noise_scale
219219
loss, res, gnorm, _ = model.step(sess, inp, target, True, noise_param)
220220
step_time += time.time() - start_time
221221
acc_grad_norm += float(gnorm)
@@ -234,7 +234,7 @@ def train():
234234
acc_loss /= step_count
235235
step_time /= FLAGS.steps_per_checkpoint
236236
acc_seq_err = float(acc_seq_err) / (step_count * batch_size)
237-
prev_seq_err = acc_seq_err
237+
prev_seq_err = max(0.0, acc_seq_err - 0.02) # No noise at error < 2%.
238238
acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0
239239
msg1 = "step %d step-time %.2f" % (global_step, step_time)
240240
msg2 = "lr %.8f pull %.3f" % (learning_rate, pull)

0 commit comments

Comments
 (0)