@@ -144,7 +144,7 @@ def __init__(self, lr=0.01, momentum=0., decay=0.,
144
144
nesterov = False , ** kwargs ):
145
145
super (SGD , self ).__init__ (** kwargs )
146
146
with K .name_scope (self .__class__ .__name__ ):
147
- self .iterations = K .variable (0. , name = 'iterations' )
147
+ self .iterations = K .variable (0 , dtype = 'int64' , name = 'iterations' )
148
148
self .lr = K .variable (lr , name = 'lr' )
149
149
self .momentum = K .variable (momentum , name = 'momentum' )
150
150
self .decay = K .variable (decay , name = 'decay' )
@@ -154,15 +154,14 @@ def __init__(self, lr=0.01, momentum=0., decay=0.,
154
154
@interfaces .legacy_get_updates_support
155
155
def get_updates (self , loss , params ):
156
156
grads = self .get_gradients (loss , params )
157
- self .updates = []
157
+ self .updates = [K . update_add ( self . iterations , 1 ) ]
158
158
159
159
lr = self .lr
160
160
if self .initial_decay > 0 :
161
- lr *= (1. / (1. + self .decay * self .iterations ))
162
- self .updates .append (K .update_add (self .iterations , 1 ))
163
-
161
+ lr *= (1. / (1. + self .decay * K .cast (self .iterations ,
162
+ K .dtype (self .decay ))))
164
163
# momentum
165
- shapes = [K .get_variable_shape (p ) for p in params ]
164
+ shapes = [K .int_shape (p ) for p in params ]
166
165
moments = [K .zeros (shape ) for shape in shapes ]
167
166
self .weights = [self .iterations ] + moments
168
167
for p , g , m in zip (params , grads , moments ):
@@ -217,21 +216,21 @@ def __init__(self, lr=0.001, rho=0.9, epsilon=1e-8, decay=0.,
217
216
self .lr = K .variable (lr , name = 'lr' )
218
217
self .rho = K .variable (rho , name = 'rho' )
219
218
self .decay = K .variable (decay , name = 'decay' )
220
- self .iterations = K .variable (0. , name = 'iterations' )
219
+ self .iterations = K .variable (0 , dtype = 'int64' , name = 'iterations' )
221
220
self .epsilon = epsilon
222
221
self .initial_decay = decay
223
222
224
223
@interfaces .legacy_get_updates_support
225
224
def get_updates (self , loss , params ):
226
225
grads = self .get_gradients (loss , params )
227
- accumulators = [K .zeros (K .get_variable_shape (p ), dtype = K .dtype (p )) for p in params ]
226
+ accumulators = [K .zeros (K .int_shape (p ), dtype = K .dtype (p )) for p in params ]
228
227
self .weights = accumulators
229
- self .updates = []
228
+ self .updates = [K . update_add ( self . iterations , 1 ) ]
230
229
231
230
lr = self .lr
232
231
if self .initial_decay > 0 :
233
- lr *= (1. / (1. + self .decay * self .iterations ))
234
- self . updates . append ( K . update_add (self .iterations , 1 ))
232
+ lr *= (1. / (1. + self .decay * K . cast ( self .iterations ,
233
+ K . dtype (self .decay )) ))
235
234
236
235
for p , g , a in zip (params , grads , accumulators ):
237
236
# update accumulator
@@ -275,22 +274,22 @@ def __init__(self, lr=0.01, epsilon=1e-8, decay=0., **kwargs):
275
274
with K .name_scope (self .__class__ .__name__ ):
276
275
self .lr = K .variable (lr , name = 'lr' )
277
276
self .decay = K .variable (decay , name = 'decay' )
278
- self .iterations = K .variable (0. , name = 'iterations' )
277
+ self .iterations = K .variable (0 , dtype = 'int64' , name = 'iterations' )
279
278
self .epsilon = epsilon
280
279
self .initial_decay = decay
281
280
282
281
@interfaces .legacy_get_updates_support
283
282
def get_updates (self , loss , params ):
284
283
grads = self .get_gradients (loss , params )
285
- shapes = [K .get_variable_shape (p ) for p in params ]
284
+ shapes = [K .int_shape (p ) for p in params ]
286
285
accumulators = [K .zeros (shape ) for shape in shapes ]
287
286
self .weights = accumulators
288
- self .updates = []
287
+ self .updates = [K . update_add ( self . iterations , 1 ) ]
289
288
290
289
lr = self .lr
291
290
if self .initial_decay > 0 :
292
- lr *= (1. / (1. + self .decay * self .iterations ))
293
- self . updates . append ( K . update_add (self .iterations , 1 ))
291
+ lr *= (1. / (1. + self .decay * K . cast ( self .iterations ,
292
+ K . dtype (self .decay )) ))
294
293
295
294
for p , g , a in zip (params , grads , accumulators ):
296
295
new_a = a + K .square (g ) # update accumulator
@@ -335,24 +334,24 @@ def __init__(self, lr=1.0, rho=0.95, epsilon=1e-8, decay=0.,
335
334
with K .name_scope (self .__class__ .__name__ ):
336
335
self .lr = K .variable (lr , name = 'lr' )
337
336
self .decay = K .variable (decay , name = 'decay' )
338
- self .iterations = K .variable (0. , name = 'iterations' )
337
+ self .iterations = K .variable (0 , dtype = 'int64' , name = 'iterations' )
339
338
self .rho = rho
340
339
self .epsilon = epsilon
341
340
self .initial_decay = decay
342
341
343
342
@interfaces .legacy_get_updates_support
344
343
def get_updates (self , loss , params ):
345
344
grads = self .get_gradients (loss , params )
346
- shapes = [K .get_variable_shape (p ) for p in params ]
345
+ shapes = [K .int_shape (p ) for p in params ]
347
346
accumulators = [K .zeros (shape ) for shape in shapes ]
348
347
delta_accumulators = [K .zeros (shape ) for shape in shapes ]
349
348
self .weights = accumulators + delta_accumulators
350
- self .updates = []
349
+ self .updates = [K . update_add ( self . iterations , 1 ) ]
351
350
352
351
lr = self .lr
353
352
if self .initial_decay > 0 :
354
- lr *= (1. / (1. + self .decay * self .iterations ))
355
- self . updates . append ( K . update_add (self .iterations , 1 ))
353
+ lr *= (1. / (1. + self .decay * K . cast ( self .iterations ,
354
+ K . dtype (self .decay )) ))
356
355
357
356
for p , g , a , d_a in zip (params , grads , accumulators , delta_accumulators ):
358
357
# update accumulator
@@ -403,7 +402,7 @@ def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
403
402
epsilon = 1e-8 , decay = 0. , ** kwargs ):
404
403
super (Adam , self ).__init__ (** kwargs )
405
404
with K .name_scope (self .__class__ .__name__ ):
406
- self .iterations = K .variable (0 , name = 'iterations' )
405
+ self .iterations = K .variable (0 , dtype = 'int64' , name = 'iterations' )
407
406
self .lr = K .variable (lr , name = 'lr' )
408
407
self .beta_1 = K .variable (beta_1 , name = 'beta_1' )
409
408
self .beta_2 = K .variable (beta_2 , name = 'beta_2' )
@@ -418,14 +417,15 @@ def get_updates(self, loss, params):
418
417
419
418
lr = self .lr
420
419
if self .initial_decay > 0 :
421
- lr *= (1. / (1. + self .decay * self .iterations ))
420
+ lr *= (1. / (1. + self .decay * K .cast (self .iterations ,
421
+ K .dtype (self .decay ))))
422
422
423
- t = self .iterations + 1
423
+ t = K . cast ( self .iterations , K . floatx ()) + 1
424
424
lr_t = lr * (K .sqrt (1. - K .pow (self .beta_2 , t )) /
425
425
(1. - K .pow (self .beta_1 , t )))
426
426
427
- ms = [K .zeros (K .get_variable_shape (p ), dtype = K .dtype (p )) for p in params ]
428
- vs = [K .zeros (K .get_variable_shape (p ), dtype = K .dtype (p )) for p in params ]
427
+ ms = [K .zeros (K .int_shape (p ), dtype = K .dtype (p )) for p in params ]
428
+ vs = [K .zeros (K .int_shape (p ), dtype = K .dtype (p )) for p in params ]
429
429
self .weights = [self .iterations ] + ms + vs
430
430
431
431
for p , g , m , v in zip (params , grads , ms , vs ):
@@ -474,7 +474,7 @@ def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
474
474
epsilon = 1e-8 , decay = 0. , ** kwargs ):
475
475
super (Adamax , self ).__init__ (** kwargs )
476
476
with K .name_scope (self .__class__ .__name__ ):
477
- self .iterations = K .variable (0. , name = 'iterations' )
477
+ self .iterations = K .variable (0 , dtype = 'int64' , name = 'iterations' )
478
478
self .lr = K .variable (lr , name = 'lr' )
479
479
self .beta_1 = K .variable (beta_1 , name = 'beta_1' )
480
480
self .beta_2 = K .variable (beta_2 , name = 'beta_2' )
@@ -489,12 +489,13 @@ def get_updates(self, loss, params):
489
489
490
490
lr = self .lr
491
491
if self .initial_decay > 0 :
492
- lr *= (1. / (1. + self .decay * self .iterations ))
492
+ lr *= (1. / (1. + self .decay * K .cast (self .iterations ,
493
+ K .dtype (self .decay ))))
493
494
494
- t = self .iterations + 1
495
+ t = K . cast ( self .iterations , K . floatx ()) + 1
495
496
lr_t = lr / (1. - K .pow (self .beta_1 , t ))
496
497
497
- shapes = [K .get_variable_shape (p ) for p in params ]
498
+ shapes = [K .int_shape (p ) for p in params ]
498
499
# zero init of 1st moment
499
500
ms = [K .zeros (shape ) for shape in shapes ]
500
501
# zero init of exponentially weighted infinity norm
@@ -552,7 +553,7 @@ def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
552
553
epsilon = 1e-8 , schedule_decay = 0.004 , ** kwargs ):
553
554
super (Nadam , self ).__init__ (** kwargs )
554
555
with K .name_scope (self .__class__ .__name__ ):
555
- self .iterations = K .variable (0. , name = 'iterations' )
556
+ self .iterations = K .variable (0 , dtype = 'int64' , name = 'iterations' )
556
557
self .m_schedule = K .variable (1. , name = 'm_schedule' )
557
558
self .lr = K .variable (lr , name = 'lr' )
558
559
self .beta_1 = K .variable (beta_1 , name = 'beta_1' )
@@ -565,7 +566,7 @@ def get_updates(self, loss, params):
565
566
grads = self .get_gradients (loss , params )
566
567
self .updates = [K .update_add (self .iterations , 1 )]
567
568
568
- t = self .iterations + 1
569
+ t = K . cast ( self .iterations , K . floatx ()) + 1
569
570
570
571
# Due to the recommendations in [2], i.e. warming momentum schedule
571
572
momentum_cache_t = self .beta_1 * (1. - 0.5 * (K .pow (K .cast_to_floatx (0.96 ), t * self .schedule_decay )))
@@ -574,7 +575,7 @@ def get_updates(self, loss, params):
574
575
m_schedule_next = self .m_schedule * momentum_cache_t * momentum_cache_t_1
575
576
self .updates .append ((self .m_schedule , m_schedule_new ))
576
577
577
- shapes = [K .get_variable_shape (p ) for p in params ]
578
+ shapes = [K .int_shape (p ) for p in params ]
578
579
ms = [K .zeros (shape ) for shape in shapes ]
579
580
vs = [K .zeros (shape ) for shape in shapes ]
580
581
@@ -618,13 +619,13 @@ class TFOptimizer(Optimizer):
618
619
619
620
def __init__ (self , optimizer ):
620
621
self .optimizer = optimizer
621
- self .updates = []
622
622
with K .name_scope (self .__class__ .__name__ ):
623
- self .iterations = K .variable (0. , name = 'iterations' )
623
+ self .iterations = K .variable (0 , dtype = 'int64' , name = 'iterations' )
624
624
625
625
@interfaces .legacy_get_updates_support
626
626
def get_updates (self , loss , params ):
627
627
grads = self .optimizer .compute_gradients (loss , params )
628
+ self .updates = [K .update_add (self .iterations , 1 )]
628
629
opt_update = self .optimizer .apply_gradients (
629
630
grads , global_step = self .iterations )
630
631
self .updates .append (opt_update )
0 commit comments