Skip to content

Commit cdbbdce

Browse files
committed
Make lr and momemtum shared_scalars
With lr and momentum being scalars we can change their values without needing to recompile the model. This PR also includes a Callback called LrSetter that gets a dict with epoch x lr pairs and set the values of the later at the begging of the associated epoch.
1 parent 103a3da commit cdbbdce

File tree

2 files changed

+37
-9
lines changed

2 files changed

+37
-9
lines changed

keras/callbacks.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from __future__ import absolute_import
22
from __future__ import print_function
33

4-
import theano
5-
import theano.tensor as T
64
import numpy as np
75

8-
import time, json, warnings
6+
import time
7+
import json
8+
import warnings
99
from collections import deque
1010

1111
from .utils.generic_utils import Progbar
@@ -262,3 +262,18 @@ def on_epoch_end(self, epoch, logs={}):
262262
r = requests.post(self.root + '/publish/epoch/end/', {'data': json.dumps(send)})
263263
except:
264264
print('Warning: could not reach RemoteMonitor root server at ' + str(self.root))
265+
266+
267+
class LrSetter(Callback):
268+
'''LrSetter
269+
epoch_rl is a dict with epoch x learning_rate pairs
270+
everytime you get to an epoch in that dict, change the learning rate to that
271+
value
272+
'''
273+
def __init__(self, epoch_lr):
274+
super(LrSetter, self).__init__()
275+
self.epoch_lr = epoch_lr
276+
277+
def on_epoch_end(self, epoch, logs={}):
278+
if str(epoch) in self.epoch_lr:
279+
self.model.lr.set_value(self.epoch_lr[str(epoch)])

keras/optimizers.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import absolute_import
22
import theano
33
import theano.tensor as T
4-
import numpy as np
54

65
from .utils.theano_utils import shared_zeros, shared_scalar, floatX
76
from six.moves import zip
@@ -49,10 +48,13 @@ def get_config(self):
4948

5049
class SGD(Optimizer):
5150

52-
def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False, *args, **kwargs):
51+
def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False, *args,
52+
**kwargs):
5353
super(SGD, self).__init__(**kwargs)
5454
self.__dict__.update(locals())
5555
self.iterations = shared_scalar(0)
56+
self.lr = shared_scalar(lr)
57+
self.momentum = shared_scalar(momentum)
5658

5759
def get_updates(self, params, constraints, loss):
5860
grads = self.get_gradients(loss, params)
@@ -84,6 +86,8 @@ class RMSprop(Optimizer):
8486
def __init__(self, lr=0.001, rho=0.9, epsilon=1e-6, *args, **kwargs):
8587
super(RMSprop, self).__init__(**kwargs)
8688
self.__dict__.update(locals())
89+
self.lr = shared_scalar(lr)
90+
self.rho = shared_scalar(rho)
8791

8892
def get_updates(self, params, constraints, loss):
8993
grads = self.get_gradients(loss, params)
@@ -109,6 +113,7 @@ class Adagrad(Optimizer):
109113
def __init__(self, lr=0.01, epsilon=1e-6, *args, **kwargs):
110114
super(Adagrad, self).__init__(**kwargs)
111115
self.__dict__.update(locals())
116+
self.lr = shared_scalar(lr)
112117

113118
def get_updates(self, params, constraints, loss):
114119
grads = self.get_gradients(loss, params)
@@ -135,19 +140,22 @@ class Adadelta(Optimizer):
135140
def __init__(self, lr=1.0, rho=0.95, epsilon=1e-6, *args, **kwargs):
136141
super(Adadelta, self).__init__(**kwargs)
137142
self.__dict__.update(locals())
143+
self.lr = shared_scalar(lr)
138144

139145
def get_updates(self, params, constraints, loss):
140146
grads = self.get_gradients(loss, params)
141147
accumulators = [shared_zeros(p.get_value().shape) for p in params]
142148
delta_accumulators = [shared_zeros(p.get_value().shape) for p in params]
143149
self.updates = []
144150

145-
for p, g, a, d_a, c in zip(params, grads, accumulators, delta_accumulators, constraints):
151+
for p, g, a, d_a, c in zip(params, grads, accumulators,
152+
delta_accumulators, constraints):
146153
new_a = self.rho * a + (1 - self.rho) * g ** 2 # update accumulator
147154
self.updates.append((a, new_a))
148155

149156
# use the new accumulator and the *old* delta_accumulator
150-
update = g * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a + self.epsilon)
157+
update = g * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a +
158+
self.epsilon)
151159

152160
new_p = p - self.lr * update
153161
self.updates.append((p, c(new_p))) # apply constraints
@@ -170,10 +178,12 @@ class Adam(Optimizer):
170178
171179
Default parameters follow those provided in the original paper.
172180
'''
173-
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, *args, **kwargs):
181+
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, *args,
182+
**kwargs):
174183
super(Adam, self).__init__(**kwargs)
175184
self.__dict__.update(locals())
176185
self.iterations = shared_scalar(0)
186+
self.lr = shared_scalar(lr)
177187

178188
def get_updates(self, params, constraints, loss):
179189
grads = self.get_gradients(loss, params)
@@ -210,5 +220,8 @@ def get_config(self):
210220
adam = Adam
211221

212222
from .utils.generic_utils import get_from_module
223+
224+
213225
def get(identifier, kwargs=None):
214-
return get_from_module(identifier, globals(), 'optimizer', instantiate=True, kwargs=kwargs)
226+
return get_from_module(identifier, globals(), 'optimizer', instantiate=True,
227+
kwargs=kwargs)

0 commit comments

Comments
 (0)