Skip to content

Commit 2d8739d

Browse files
authored
Minor optimizer consistency fixes.
1 parent d440e4b commit 2d8739d

File tree

4 files changed

+40
-39
lines changed

4 files changed

+40
-39
lines changed

keras/backend/tensorflow_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,7 +2153,7 @@ def set_value(x, value):
21532153
value: Value to set the tensor to, as a Numpy array
21542154
(of the same shape).
21552155
"""
2156-
value = np.asarray(value)
2156+
value = np.asarray(value, dtype=dtype(x))
21572157
tf_dtype = _convert_string_dtype(x.dtype.name.split('_')[0])
21582158
if hasattr(x, '_assign_placeholder'):
21592159
assign_placeholder = x._assign_placeholder
@@ -2177,7 +2177,7 @@ def batch_set_value(tuples):
21772177
assign_ops = []
21782178
feed_dict = {}
21792179
for x, value in tuples:
2180-
value = np.asarray(value)
2180+
value = np.asarray(value, dtype=dtype(x))
21812181
tf_dtype = _convert_string_dtype(x.dtype.name.split('_')[0])
21822182
if hasattr(x, '_assign_placeholder'):
21832183
assign_placeholder = x._assign_placeholder

keras/engine/training.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,7 @@ def compile(self, optimizer, loss, metrics=None, loss_weights=None,
875875
def append_metric(layer_index, metric_name, metric_tensor):
876876
"""Helper function used in loop below."""
877877
if len(self.output_names) > 1:
878-
metric_name = self.output_layers[layer_index].name + '_' + metric_name
878+
metric_name = self.output_names[layer_index] + '_' + metric_name
879879
self.metrics_names.append(metric_name)
880880
self.metrics_tensors.append(metric_tensor)
881881

keras/layers/wrappers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class Wrapper(Layer):
2323
def __init__(self, layer, **kwargs):
2424
self.layer = layer
2525
# Tracks mapping of Wrapper inputs to inner layer inputs. Useful when
26-
# the inner layer has update ops that depend on it's inputs (as opposed
26+
# the inner layer has update ops that depend on its inputs (as opposed
2727
# to the inputs to the Wrapper layer).
2828
self._input_map = {}
2929
super(Wrapper, self).__init__(**kwargs)

keras/optimizers.py

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def __init__(self, lr=0.01, momentum=0., decay=0.,
144144
nesterov=False, **kwargs):
145145
super(SGD, self).__init__(**kwargs)
146146
with K.name_scope(self.__class__.__name__):
147-
self.iterations = K.variable(0., name='iterations')
147+
self.iterations = K.variable(0, dtype='int64', name='iterations')
148148
self.lr = K.variable(lr, name='lr')
149149
self.momentum = K.variable(momentum, name='momentum')
150150
self.decay = K.variable(decay, name='decay')
@@ -154,15 +154,14 @@ def __init__(self, lr=0.01, momentum=0., decay=0.,
154154
@interfaces.legacy_get_updates_support
155155
def get_updates(self, loss, params):
156156
grads = self.get_gradients(loss, params)
157-
self.updates = []
157+
self.updates = [K.update_add(self.iterations, 1)]
158158

159159
lr = self.lr
160160
if self.initial_decay > 0:
161-
lr *= (1. / (1. + self.decay * self.iterations))
162-
self.updates.append(K.update_add(self.iterations, 1))
163-
161+
lr *= (1. / (1. + self.decay * K.cast(self.iterations,
162+
K.dtype(self.decay))))
164163
# momentum
165-
shapes = [K.get_variable_shape(p) for p in params]
164+
shapes = [K.int_shape(p) for p in params]
166165
moments = [K.zeros(shape) for shape in shapes]
167166
self.weights = [self.iterations] + moments
168167
for p, g, m in zip(params, grads, moments):
@@ -217,21 +216,21 @@ def __init__(self, lr=0.001, rho=0.9, epsilon=1e-8, decay=0.,
217216
self.lr = K.variable(lr, name='lr')
218217
self.rho = K.variable(rho, name='rho')
219218
self.decay = K.variable(decay, name='decay')
220-
self.iterations = K.variable(0., name='iterations')
219+
self.iterations = K.variable(0, dtype='int64', name='iterations')
221220
self.epsilon = epsilon
222221
self.initial_decay = decay
223222

224223
@interfaces.legacy_get_updates_support
225224
def get_updates(self, loss, params):
226225
grads = self.get_gradients(loss, params)
227-
accumulators = [K.zeros(K.get_variable_shape(p), dtype=K.dtype(p)) for p in params]
226+
accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
228227
self.weights = accumulators
229-
self.updates = []
228+
self.updates = [K.update_add(self.iterations, 1)]
230229

231230
lr = self.lr
232231
if self.initial_decay > 0:
233-
lr *= (1. / (1. + self.decay * self.iterations))
234-
self.updates.append(K.update_add(self.iterations, 1))
232+
lr *= (1. / (1. + self.decay * K.cast(self.iterations,
233+
K.dtype(self.decay))))
235234

236235
for p, g, a in zip(params, grads, accumulators):
237236
# update accumulator
@@ -275,22 +274,22 @@ def __init__(self, lr=0.01, epsilon=1e-8, decay=0., **kwargs):
275274
with K.name_scope(self.__class__.__name__):
276275
self.lr = K.variable(lr, name='lr')
277276
self.decay = K.variable(decay, name='decay')
278-
self.iterations = K.variable(0., name='iterations')
277+
self.iterations = K.variable(0, dtype='int64', name='iterations')
279278
self.epsilon = epsilon
280279
self.initial_decay = decay
281280

282281
@interfaces.legacy_get_updates_support
283282
def get_updates(self, loss, params):
284283
grads = self.get_gradients(loss, params)
285-
shapes = [K.get_variable_shape(p) for p in params]
284+
shapes = [K.int_shape(p) for p in params]
286285
accumulators = [K.zeros(shape) for shape in shapes]
287286
self.weights = accumulators
288-
self.updates = []
287+
self.updates = [K.update_add(self.iterations, 1)]
289288

290289
lr = self.lr
291290
if self.initial_decay > 0:
292-
lr *= (1. / (1. + self.decay * self.iterations))
293-
self.updates.append(K.update_add(self.iterations, 1))
291+
lr *= (1. / (1. + self.decay * K.cast(self.iterations,
292+
K.dtype(self.decay))))
294293

295294
for p, g, a in zip(params, grads, accumulators):
296295
new_a = a + K.square(g) # update accumulator
@@ -335,24 +334,24 @@ def __init__(self, lr=1.0, rho=0.95, epsilon=1e-8, decay=0.,
335334
with K.name_scope(self.__class__.__name__):
336335
self.lr = K.variable(lr, name='lr')
337336
self.decay = K.variable(decay, name='decay')
338-
self.iterations = K.variable(0., name='iterations')
337+
self.iterations = K.variable(0, dtype='int64', name='iterations')
339338
self.rho = rho
340339
self.epsilon = epsilon
341340
self.initial_decay = decay
342341

343342
@interfaces.legacy_get_updates_support
344343
def get_updates(self, loss, params):
345344
grads = self.get_gradients(loss, params)
346-
shapes = [K.get_variable_shape(p) for p in params]
345+
shapes = [K.int_shape(p) for p in params]
347346
accumulators = [K.zeros(shape) for shape in shapes]
348347
delta_accumulators = [K.zeros(shape) for shape in shapes]
349348
self.weights = accumulators + delta_accumulators
350-
self.updates = []
349+
self.updates = [K.update_add(self.iterations, 1)]
351350

352351
lr = self.lr
353352
if self.initial_decay > 0:
354-
lr *= (1. / (1. + self.decay * self.iterations))
355-
self.updates.append(K.update_add(self.iterations, 1))
353+
lr *= (1. / (1. + self.decay * K.cast(self.iterations,
354+
K.dtype(self.decay))))
356355

357356
for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
358357
# update accumulator
@@ -403,7 +402,7 @@ def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
403402
epsilon=1e-8, decay=0., **kwargs):
404403
super(Adam, self).__init__(**kwargs)
405404
with K.name_scope(self.__class__.__name__):
406-
self.iterations = K.variable(0, name='iterations')
405+
self.iterations = K.variable(0, dtype='int64', name='iterations')
407406
self.lr = K.variable(lr, name='lr')
408407
self.beta_1 = K.variable(beta_1, name='beta_1')
409408
self.beta_2 = K.variable(beta_2, name='beta_2')
@@ -418,14 +417,15 @@ def get_updates(self, loss, params):
418417

419418
lr = self.lr
420419
if self.initial_decay > 0:
421-
lr *= (1. / (1. + self.decay * self.iterations))
420+
lr *= (1. / (1. + self.decay * K.cast(self.iterations,
421+
K.dtype(self.decay))))
422422

423-
t = self.iterations + 1
423+
t = K.cast(self.iterations, K.floatx()) + 1
424424
lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
425425
(1. - K.pow(self.beta_1, t)))
426426

427-
ms = [K.zeros(K.get_variable_shape(p), dtype=K.dtype(p)) for p in params]
428-
vs = [K.zeros(K.get_variable_shape(p), dtype=K.dtype(p)) for p in params]
427+
ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
428+
vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
429429
self.weights = [self.iterations] + ms + vs
430430

431431
for p, g, m, v in zip(params, grads, ms, vs):
@@ -474,7 +474,7 @@ def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
474474
epsilon=1e-8, decay=0., **kwargs):
475475
super(Adamax, self).__init__(**kwargs)
476476
with K.name_scope(self.__class__.__name__):
477-
self.iterations = K.variable(0., name='iterations')
477+
self.iterations = K.variable(0, dtype='int64', name='iterations')
478478
self.lr = K.variable(lr, name='lr')
479479
self.beta_1 = K.variable(beta_1, name='beta_1')
480480
self.beta_2 = K.variable(beta_2, name='beta_2')
@@ -489,12 +489,13 @@ def get_updates(self, loss, params):
489489

490490
lr = self.lr
491491
if self.initial_decay > 0:
492-
lr *= (1. / (1. + self.decay * self.iterations))
492+
lr *= (1. / (1. + self.decay * K.cast(self.iterations,
493+
K.dtype(self.decay))))
493494

494-
t = self.iterations + 1
495+
t = K.cast(self.iterations, K.floatx()) + 1
495496
lr_t = lr / (1. - K.pow(self.beta_1, t))
496497

497-
shapes = [K.get_variable_shape(p) for p in params]
498+
shapes = [K.int_shape(p) for p in params]
498499
# zero init of 1st moment
499500
ms = [K.zeros(shape) for shape in shapes]
500501
# zero init of exponentially weighted infinity norm
@@ -552,7 +553,7 @@ def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
552553
epsilon=1e-8, schedule_decay=0.004, **kwargs):
553554
super(Nadam, self).__init__(**kwargs)
554555
with K.name_scope(self.__class__.__name__):
555-
self.iterations = K.variable(0., name='iterations')
556+
self.iterations = K.variable(0, dtype='int64', name='iterations')
556557
self.m_schedule = K.variable(1., name='m_schedule')
557558
self.lr = K.variable(lr, name='lr')
558559
self.beta_1 = K.variable(beta_1, name='beta_1')
@@ -565,7 +566,7 @@ def get_updates(self, loss, params):
565566
grads = self.get_gradients(loss, params)
566567
self.updates = [K.update_add(self.iterations, 1)]
567568

568-
t = self.iterations + 1
569+
t = K.cast(self.iterations, K.floatx()) + 1
569570

570571
# Due to the recommendations in [2], i.e. warming momentum schedule
571572
momentum_cache_t = self.beta_1 * (1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
@@ -574,7 +575,7 @@ def get_updates(self, loss, params):
574575
m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
575576
self.updates.append((self.m_schedule, m_schedule_new))
576577

577-
shapes = [K.get_variable_shape(p) for p in params]
578+
shapes = [K.int_shape(p) for p in params]
578579
ms = [K.zeros(shape) for shape in shapes]
579580
vs = [K.zeros(shape) for shape in shapes]
580581

@@ -618,13 +619,13 @@ class TFOptimizer(Optimizer):
618619

619620
def __init__(self, optimizer):
620621
self.optimizer = optimizer
621-
self.updates = []
622622
with K.name_scope(self.__class__.__name__):
623-
self.iterations = K.variable(0., name='iterations')
623+
self.iterations = K.variable(0, dtype='int64', name='iterations')
624624

625625
@interfaces.legacy_get_updates_support
626626
def get_updates(self, loss, params):
627627
grads = self.optimizer.compute_gradients(loss, params)
628+
self.updates = [K.update_add(self.iterations, 1)]
628629
opt_update = self.optimizer.apply_gradients(
629630
grads, global_step=self.iterations)
630631
self.updates.append(opt_update)

0 commit comments

Comments
 (0)