1414
1515from theano .tensor .shared_randomstreams import RandomStreams
1616
17- from theano .sandbox .scan import scan
18-
19-
2017class RBM ():
2118 """Restricted Boltzmann Machine (RBM)
2219 """
@@ -267,8 +264,7 @@ class RBM_option2(object):
267264 *** WRITE THE ENERGY FUNCTION USE SAME LETTERS AS VARIABLE NAMES IN CODE
268265 """
269266
270- @classmethod
271- def new (cls , input = None , n_visible = 784 , n_hidden = 500 ,
267+ def __init__ (self , input = None , n_visible = 784 , n_hidden = 500 ,
272268 W = None , hbias = None , vbias = None ,
273269 numpy_rng = None ):
274270 """
@@ -320,12 +316,7 @@ def new(cls, input=None, n_visible=784, n_hidden=500,
320316 # initialize input layer for standalone RBM or layer0 of DBN
321317 input = T .dmatrix ('input' )
322318
323- return cls (input , W , hbias , vbias , params )
324-
325- def __init__ (self , input , W , hbias , vbias , params ):
326-
327- # setup theano random number generator
328- self .visible = self .input = input
319+ self .input = input
329320 self .W = W
330321 self .hbias = hbias
331322 self .vbias = vbias
@@ -334,41 +325,62 @@ def __init__(self, input, W, hbias, vbias, params):
334325 self .hidden_mean = T .nnet .sigmoid (T .dot (input , W )+ hbias )
335326 self .hidden_sample = Trng .binomial (self .hidden_mean .shape , 1 , self .hidden_mean )
336327
337- def gibbs_1 (self , v_sample ):
338- # quick change of names internally: v_sample -> v0_sample
339- v0_sample = v_sample ; del v_sample
340-
341- h0_mean = T .nnet .sigmoid (T .dot (v0_sample , self .W ) + self .hbias )
342- h0_sample = self .theano_rng .binomial (h0_mean .shape , 1 , h0_mean )
343- v1_mean = T .nnet .sigmoid (T .dot (h0_sample , self .W .T ) + self .vbias )
344- v1_act = self .theano_rng .binomial (v1_mean .shape , 1 , v1_mean )
345- return v1_mean , v1_act
328+ def gibbs_k (self , v_sample , k ):
329+ ''' This function implements k steps of Gibbs sampling '''
330+
331+ # We compute the visible after k steps of Gibbs by iterating
332+ # over ``gibs_1`` for k times; this can be done in Theano using
333+ # the `scan op`. For a more comprehensive description of scan see
334+ # http://deeplearning.net/software/theano/library/scan.html .
335+
336+ def gibbs_1 (v0_sample , t ):
337+ ''' This function implements one Gibbs step '''
346338
347- def gibbs_k (self , k ):
348- def gibbs_steps (v_sample ):
349- v0_sample = v_sample ; del v_sample
339+ # compute the activation of the hidden units given a sample of the
340+ # vissibles
350341 h0_mean = T .nnet .sigmoid (T .dot (v0_sample , self .W ) + self .hbias )
342+ # get a sample of the hiddens given their activation
351343 h0_sample = self .theano_rng .binomial (h0_mean .shape , 1 , h0_mean )
344+ # compute the activation of the visible given the hidden sample
352345 v1_mean = T .nnet .sigmoid (T .dot (h0_sample , self .W .T ) + self .vbias )
346+ # get a sample of the visible given their activation
353347 v1_act = self .theano_rng .binomial (v1_mean .shape , 1 , v1_mean )
354-
355- def gibbs_step (v_sample_tm1 , v_mean_tm1 ):
356- h_mean_t = T .nnet .sigmoid (T .dot (v_sample_tm1 , self .W ) + self .hbias )
357- h_sample_t = self .theano_rng .binomial (h_mean_t .shape , 1 , h_mean_t )
358- v_mean_t = T .nnet .sigmoid (T .dot (h_sample_t , self .W .T ) + self .vbias )
359- v_sample_t = self .theano_rng .binomial (v_mean_t .shape , 1 , v_mean_t )
360- return v_sample_t , v_mean_t
361-
362- v_samples , v_means = scan (gibbs_step , [], [v1_act , v1_mean ],[], \
363- n_steps = k - 1 )
364- return v_means [- 1 ], v_samples [- 1 ]
348+ return [v1_act , v1_mean ]
349+
350+
351+
352+ # Because we require as output two values, namely the mean field
353+ # approximation of the visible and the sample obtained after k steps,
354+ # scan needs to know the shape of those two outputs. Scan takes
355+ # this information from the variables containing the initial state
356+ # of the outputs. Since we do not need a initial state of ``v_mean``
357+ # we provide a dummy one used only to get the correct shape
358+ v_mean = T .zeros_like (v_sample )
359+
360+ # ``outputs_taps`` is an argument of scan which describes at each
361+ # time step what past values of the outputs the function applied
362+ # recursively needs. This is given in the form of a dictionary,
363+ # where the keys are outputs indexes, and values are a list of
364+ # of the offsets used by the corresponding outputs
365+ # In our case the function ``gibbs_1`` applied recursively, requires
366+ # at time k the past value k-1 for the first output (index 0) and
367+ # no past value of the second output
368+ outputs_taps = { 0 : [- 1 ], 1 : [- 1 ] }
369+
370+ v_samples , v_means = theano .scan ( fn = gibbs_1 ,
371+ sequences = [],
372+ initial_states = [v_sample , v_mean ],
373+ non_sequences = [],
374+ outputs_taps = outputs_taps ,
375+ n_steps = k )
376+ return v_means [- 1 ], v_samples [- 1 ]
365377
366378 def free_energy (self , v_sample ):
367379 h_mean = T .nnet .sigmoid (T .dot (v_sample , self .W ) + self .hbias )
368380 #TODO: make sure log(sigmoid) is optimized to something stable!
369381 return - T .sum (T .log (1.0001 - h_mean )) - T .sum (T .dot (v_sample , self .vbias ))
370382
371- def cd (self , visible = None , persistent = None , step = None ):
383+ def cd (self , visible = None , persistent = None , steps = 1 ):
372384 """
373385 Return a 5-tuple of values related to contrastive divergence: (cost,
374386 end-state of negative-phase chain, gradient on weights, gradient on
@@ -378,31 +390,34 @@ def cd(self, visible=None, persistent=None, step = None):
378390 If persistent is None, it defaults to self.input
379391
380392 CD aka CD1 - cd()
381- CD-10 - cd(step=gibbs_k(10) )
393+ CD-10 - cd(steps=10 )
382394 PCD - cd(persistent=shared(numpy.asarray(initializer)))
383395 PCD-k - cd(persistent=shared(numpy.asarray(initializer)),
384- step=gibbs_k(10) )
396+ steps=10 )
385397 """
386398 if visible is None :
387399 visible = self .input
388400
389401 if visible is None :
390402 raise TypeError ('visible argument is required when self.input is None' )
391403
392- if step is None :
393- step = self .gibbs_1
394-
395404 if persistent is None :
396405 chain_start = visible
397406 else :
398407 chain_start = persistent
399- chain_end_mean , chain_end_sample = step (chain_start )
408+ chain_end_mean , chain_end_sample = self . gibbs_k (chain_start , steps )
400409
401410 cost = self .free_energy (visible ) - self .free_energy (chain_end_sample )
411+
412+ # Compute the gradient of the cost with respect to the parameters
413+ # Note the use of argument ``consider_constant``. The reason for
414+ # using this parameter is because the gradient should not try to
415+ # propagate through the gibs chain
416+ gparams = T .grad (cost , self .params , consider_constant = [chain_end_sample ])
417+
418+ return (cost , chain_end_sample ,) + tuple (gparams )
402419
403- return (cost , chain_end_sample ,) + tuple (T .grad (cost , [self .W , self .hbias , self .vbias ]))
404-
405- def cd_updates (self , lr , visible = None , persistent = None , step = None ):
420+ def cd_updates (self , lr , visible = None , persistent = None , steps = 1 ):
406421 """
407422 Return the learning updates for the RBM parameters that are shared variables.
408423
@@ -417,7 +432,7 @@ def cd_updates(self, lr, visible=None, persistent=None, step = None):
417432
418433 """
419434
420- cost , chain_end , gW , ghbias , gvbias = self .cd (visible , persistent , step )
435+ cost , chain_end , gW , ghbias , gvbias = self .cd (visible , persistent , steps )
421436
422437 updates = {}
423438 if self .W in self .params :
@@ -463,14 +478,13 @@ def shared_dataset(data_xy):
463478
464479 print '... making model'
465480 # construct the RBM class
466- rbm = RBM_option2 . new (input = x , n_visible = 28 * 28 , n_hidden = 500 , numpy_rng =
481+ rbm = RBM_option2 (input = x , n_visible = 28 * 28 , n_hidden = 500 , numpy_rng =
467482 numpy .random .RandomState (234234 ))
468- step = rbm .gibbs_k (10 )
469- cost = rbm .cd (step = step )[0 ]
483+ cost = rbm .cd (steps = 10 )[0 ]
470484
471485 print '... compiling train function'
472- train_rbm = theano .function ([index ], rbm .cd (step = step )[0 ],
473- updates = rbm .cd_updates (learning_rate , step = step ),
486+ train_rbm = theano .function ([index ], rbm .cd (steps = 10 )[0 ],
487+ updates = rbm .cd_updates (learning_rate , steps = 10 ),
474488 givens = {
475489 x : train_set_x [index * batch_size :(index + 1 )* batch_size ],
476490 y : train_set_y [index * batch_size :(index + 1 )* batch_size ]}
0 commit comments