11from __future__ import absolute_import
22import theano
33import theano .tensor as T
4- import numpy as np
54
65from .utils .theano_utils import shared_zeros , shared_scalar , floatX
76from six .moves import zip
@@ -49,10 +48,13 @@ def get_config(self):
4948
5049class SGD (Optimizer ):
5150
52- def __init__ (self , lr = 0.01 , momentum = 0. , decay = 0. , nesterov = False , * args , ** kwargs ):
51+ def __init__ (self , lr = 0.01 , momentum = 0. , decay = 0. , nesterov = False , * args ,
52+ ** kwargs ):
5353 super (SGD , self ).__init__ (** kwargs )
5454 self .__dict__ .update (locals ())
5555 self .iterations = shared_scalar (0 )
56+ self .lr = shared_scalar (lr )
57+ self .momentum = shared_scalar (momentum )
5658
5759 def get_updates (self , params , constraints , loss ):
5860 grads = self .get_gradients (loss , params )
@@ -84,6 +86,8 @@ class RMSprop(Optimizer):
8486 def __init__ (self , lr = 0.001 , rho = 0.9 , epsilon = 1e-6 , * args , ** kwargs ):
8587 super (RMSprop , self ).__init__ (** kwargs )
8688 self .__dict__ .update (locals ())
89+ self .lr = shared_scalar (lr )
90+ self .rho = shared_scalar (rho )
8791
8892 def get_updates (self , params , constraints , loss ):
8993 grads = self .get_gradients (loss , params )
@@ -109,6 +113,7 @@ class Adagrad(Optimizer):
109113 def __init__ (self , lr = 0.01 , epsilon = 1e-6 , * args , ** kwargs ):
110114 super (Adagrad , self ).__init__ (** kwargs )
111115 self .__dict__ .update (locals ())
116+ self .lr = shared_scalar (lr )
112117
113118 def get_updates (self , params , constraints , loss ):
114119 grads = self .get_gradients (loss , params )
@@ -135,19 +140,22 @@ class Adadelta(Optimizer):
135140 def __init__ (self , lr = 1.0 , rho = 0.95 , epsilon = 1e-6 , * args , ** kwargs ):
136141 super (Adadelta , self ).__init__ (** kwargs )
137142 self .__dict__ .update (locals ())
143+ self .lr = shared_scalar (lr )
138144
139145 def get_updates (self , params , constraints , loss ):
140146 grads = self .get_gradients (loss , params )
141147 accumulators = [shared_zeros (p .get_value ().shape ) for p in params ]
142148 delta_accumulators = [shared_zeros (p .get_value ().shape ) for p in params ]
143149 self .updates = []
144150
145- for p , g , a , d_a , c in zip (params , grads , accumulators , delta_accumulators , constraints ):
151+ for p , g , a , d_a , c in zip (params , grads , accumulators ,
152+ delta_accumulators , constraints ):
146153 new_a = self .rho * a + (1 - self .rho ) * g ** 2 # update accumulator
147154 self .updates .append ((a , new_a ))
148155
149156 # use the new accumulator and the *old* delta_accumulator
150- update = g * T .sqrt (d_a + self .epsilon ) / T .sqrt (new_a + self .epsilon )
157+ update = g * T .sqrt (d_a + self .epsilon ) / T .sqrt (new_a +
158+ self .epsilon )
151159
152160 new_p = p - self .lr * update
153161 self .updates .append ((p , c (new_p ))) # apply constraints
@@ -170,10 +178,12 @@ class Adam(Optimizer):
170178
171179 Default parameters follow those provided in the original paper.
172180 '''
173- def __init__ (self , lr = 0.001 , beta_1 = 0.9 , beta_2 = 0.999 , epsilon = 1e-8 , * args , ** kwargs ):
181+ def __init__ (self , lr = 0.001 , beta_1 = 0.9 , beta_2 = 0.999 , epsilon = 1e-8 , * args ,
182+ ** kwargs ):
174183 super (Adam , self ).__init__ (** kwargs )
175184 self .__dict__ .update (locals ())
176185 self .iterations = shared_scalar (0 )
186+ self .lr = shared_scalar (lr )
177187
178188 def get_updates (self , params , constraints , loss ):
179189 grads = self .get_gradients (loss , params )
@@ -210,5 +220,8 @@ def get_config(self):
210220adam = Adam
211221
212222from .utils .generic_utils import get_from_module
223+
224+
213225def get (identifier , kwargs = None ):
214- return get_from_module (identifier , globals (), 'optimizer' , instantiate = True , kwargs = kwargs )
226+ return get_from_module (identifier , globals (), 'optimizer' , instantiate = True ,
227+ kwargs = kwargs )
0 commit comments