Merge git://github.com/fidlej/DeepLearningTutorials

nouiz · nouiz · commit d7ea6c27babc · 2010-07-19T09:41:37.000-04:00
diff --git a/code/mlp.py b/code/mlp.py
@@ -69,22 +69,12 @@ def __init__(self, rng, input, n_in, n_out, activation = T.tanh):
         #        For example, results presented in [Xavier10] suggest that you 
         #        should use 4 times larger initial weights for sigmoid 
         #        compared to tanh
-        if activation == theano.tensor.tanh:
-            W_values = numpy.asarray( rng.uniform(
-                    low  = - numpy.sqrt(6./(n_in+n_out)),
-                    high = numpy.sqrt(6./(n_in+n_out)),
-                    size = (n_in, n_out)), dtype = theano.config.floatX)
-        elif activation == theano.tensor.nnet.sigmoid:
-            W_values = numpy.asarray( 4*rng.uniform(
-                    low  = - numpy.sqrt(6./(n_in+n_out)),
-                    high = numpy.sqrt(6./(n_in+n_out)),
-                    size = (n_in, n_out)), dtype = theano.config.floatX)
-        else:
-            # how should we initialize the weights for your activation function ?
-            W_values = numpy.asarray( rng.uniform(
-                    low  = - numpy.sqrt(6./(n_in+n_out)),
-                    high = numpy.sqrt(6./(n_in+n_out)),
-                    size = (n_in,n_out)), dtype = theano.config.floatX)
+        W_values = numpy.asarray( rng.uniform(
+                low  = - numpy.sqrt(6./(n_in+n_out)),
+                high = numpy.sqrt(6./(n_in+n_out)),
+                size = (n_in, n_out)), dtype = theano.config.floatX)
+        if activation == theano.tensor.nnet.sigmoid:
+            W_values *= 4
 
         self.W = theano.shared(value = W_values, name ='W')
 
diff --git a/code/rbm.py b/code/rbm.py
@@ -50,6 +50,13 @@ def __init__(self, input=None, n_visible=784, n_hidden=500, \
         self.n_hidden  = n_hidden
 
 
+        if numpy_rng is None:    
+            # create a number generator 
+            numpy_rng = numpy.random.RandomState(1234)
+
+        if theano_rng is None : 
+            theano_rng = RandomStreams(numpy_rng.randint(2**30))
+
         if W is None : 
            # W is initialized with `initial_W` which is uniformely sampled
            # from -4*sqrt(6./(n_visible+n_hidden)) and 4*sqrt(6./(n_hidden+n_visible))
@@ -73,13 +80,6 @@ def __init__(self, input=None, n_visible=784, n_hidden=500, \
             vbias = theano.shared(value =numpy.zeros(n_visible, 
                                 dtype = theano.config.floatX),name='vbias')
 
-        if numpy_rng is None:    
-            # create a number generator 
-            numpy_rng = numpy.random.RandomState(1234)
-
-        if theano_rng is None : 
-            theano_rng = RandomStreams(numpy_rng.randint(2**30))
-
 
         # initialize input layer for standalone RBM or layer0 of DBN
         self.input = input 
@@ -93,10 +93,6 @@ def __init__(self, input=None, n_visible=784, n_hidden=500, \
         # **** WARNING: It is not a good idea to put things in this list 
         # other than shared variables created in this function.
         self.params     = [self.W, self.hbias, self.vbias]
-        # cast batch_size to floatX, because its type is int64,
-        # and otherwise the gradients are upcasted to float64,
-        # even when floatX == float32
-        self.batch_size = T.cast(self.input.shape[0], dtype = theano.config.floatX)
 
 
     def free_energy(self, v_sample):
diff --git a/doc/mlp.txt b/doc/mlp.txt
@@ -161,22 +161,12 @@ both upward (activations flowing from inputs to outputs) and backward
         #        For example, results presented in [Xavier10]_ suggest that you 
         #        should use 4 times larger initial weights for sigmoid 
         #        compared to tanh
-        if activation == theano.tensor.tanh:
-            W_values = numpy.asarray( rng.uniform(
-                    low  = - numpy.sqrt(6./(n_in+n_out)),
-                    high =   numpy.sqrt(6./(n_in+n_out)),
-                    size = (n_in, n_out)), dtype = theano.config.floatX)
-        elif activation == theano.tensor.nnet.sigmoid:
-            W_values = numpy.asarray( 4*rng.uniform(
-                    low  = - numpy.sqrt(6./(n_in+n_out)),
-                    high =   numpy.sqrt(6./(n_in+n_out)),
-                    size = (n_in, n_out)), dtype = theano.config.floatX)
-        else:
-            # how should we initialize the weights for your activation function ?
-            W_values = numpy.asarray( rng.uniform(
-                    low  = - numpy.sqrt(6./(n_in+n_out)),
-                    high =   numpy.sqrt(6./(n_in+n_out)),
-                    size = (n_in,n_out)), dtype = theano.config.floatX)
+        W_values = numpy.asarray( rng.uniform(
+                low  = - numpy.sqrt(6./(n_in+n_out)),
+                high =   numpy.sqrt(6./(n_in+n_out)),
+                size = (n_in, n_out)), dtype = theano.config.floatX)
+        if activation == theano.tensor.nnet.sigmoid:
+            W_values *= 4
 
         self.W = theano.shared(value = W_values, name ='W')
 
diff --git a/doc/rbm.txt b/doc/rbm.txt
@@ -344,6 +344,13 @@ corresponding sigmoidal layer of an MLP network.
         self.n_hidden  = n_hidden
 
 
+        if numpy_rng is None:    
+            # create a number generator 
+            numpy_rng = numpy.random.RandomState(1234)
+
+        if theano_rng is None : 
+            theano_rng = RandomStreams(numpy_rng.randint(2**30))
+
         if W is None : 
            # W is initialized with `initial_W` which is uniformely sampled
            # from -4.*sqrt(6./(n_visible+n_hidden)) and 4.*sqrt(6./(n_hidden+n_visible))
@@ -367,13 +374,6 @@ corresponding sigmoidal layer of an MLP network.
             vbias = theano.shared(value =numpy.zeros(n_visible, 
                                 dtype = theano.config.floatX),name='vbias')
 
-        if numpy_rng is None:    
-            # create a number generator 
-            numpy_rng = numpy.random.RandomState(1234)
-
-        if theano_rng is None : 
-            theano_rng = RandomStreams(numpy_rng.randint(2**30))
-
 
         # initialize input layer for standalone RBM or layer0 of DBN
         self.input = input if input else T.dmatrix('input')
@@ -385,10 +385,6 @@ corresponding sigmoidal layer of an MLP network.
         # **** WARNING: It is not a good idea to put things in this list 
         # other than shared variables created in this function.
         self.params     = [self.W, self.hbias, self.vbias]
-        # cast batch_size to floatX, because its type is int64,
-        # and otherwise the gradients are upcasted to float64,
-        # even when floatX == float32
-        self.batch_size = T.cast(self.input.shape[0], dtype = theano.config.floatX)
 
 
 Next step is to define functions which construct the symbolic graph associated