diff --git a/code/DBN.py b/code/DBN.py
index 27781e93..70554537 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -8,7 +8,8 @@
 import theano.tensor as T
 from theano.tensor.shared_randomstreams import RandomStreams
 
-from logistic_sgd import LogisticRegression, load_data
+from logistic_sgd import LogisticRegression 
+from load_names import load_data
 from mlp import HiddenLayer
 from rbm import RBM
 
diff --git a/code/SdA.py b/code/SdA.py
old mode 100644
new mode 100755
index f6d04df6..679e1cd1
--- a/code/SdA.py
+++ b/code/SdA.py
@@ -1,3 +1,6 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+
 """
  This tutorial introduces stacked denoising auto-encoders (SdA) using Theano.
 
@@ -36,9 +39,11 @@
 import theano.tensor as T
 from theano.tensor.shared_randomstreams import RandomStreams
 
-from logistic_sgd import LogisticRegression, load_data
+from logistic_sgd import LogisticRegression
+from load_mp3 import *
 from mlp import HiddenLayer
 from dA import dA
+from dAau import dAau
 
 
 
@@ -138,12 +143,13 @@ def __init__(self, numpy_rng, theano_rng = None, n_ins = 784,
             self.params.extend(sigmoid_layer.params)
         
             # Construct a denoising autoencoder that shared weights with this
-            # layer
-            dA_layer = dA(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, 
-                          n_visible = input_size, 
-                          n_hidden  = hidden_layers_sizes[i],  
-                          W = sigmoid_layer.W, bhid = sigmoid_layer.b)
-            self.dA_layers.append(dA_layer)        
+            # layer, use dAau for the first layer
+            dAclass = (dA, dAau)[i == 0]
+            dA_layer = dAclass(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, 
+                              n_visible = input_size, 
+                              n_hidden  = hidden_layers_sizes[i],  
+                              W = sigmoid_layer.W, bhid = sigmoid_layer.b)
+            self.dA_layers.append(dA_layer)                    
 
         
         # We now need to add a logistic layer on top of the MLP
@@ -319,7 +325,7 @@ def test_SdA( finetune_lr = 0.1, pretraining_epochs = 15, \
     numpy_rng = numpy.random.RandomState(89677)
     print '... building the model'
     # construct the stacked denoising autoencoder class
-    sda = SdA( numpy_rng = numpy_rng, n_ins = 28*28, 
+    sda = SdA( numpy_rng = numpy_rng, n_ins = S_DIM * F_DIM * X_DIM, 
                       hidden_layers_sizes = [1000,1000,1000],
                       n_outs = 10)
     
@@ -351,6 +357,8 @@ def test_SdA( finetune_lr = 0.1, pretraining_epochs = 15, \
 
     print >> sys.stderr, ('The pretraining code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((end_time-start_time)/60.))
     
+    
+    return
     ########################
     # FINETUNING THE MODEL #
     ########################
@@ -434,6 +442,8 @@ def test_SdA( finetune_lr = 0.1, pretraining_epochs = 15, \
 
 
 if __name__ == '__main__':
-    test_SdA()
-
+    d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy"
+    dataset = sorted([os.path.join(d, f) for f in os.listdir(d)])
+    dataset = ["/home/dmitry/mp3/hhgttg0101006048k.mp3"]
+    test_SdA(dataset = dataset[:1])
 
diff --git a/code/SdAext.py b/code/SdAext.py
new file mode 100644
index 00000000..e152a389
--- /dev/null
+++ b/code/SdAext.py
@@ -0,0 +1,439 @@
+"""
+ This tutorial introduces stacked denoising auto-encoders (SdA) using Theano.
+
+ Denoising autoencoders are the building blocks for SdA. 
+ They are based on auto-encoders as the ones used in Bengio et al. 2007.
+ An autoencoder takes an input x and first maps it to a hidden representation
+ y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting 
+ latent representation y is then mapped back to a "reconstructed" vector 
+ z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b').  The weight 
+ matrix W' can optionally be constrained such that W' = W^T, in which case 
+ the autoencoder is said to have tied weights. The network is trained such 
+ that to minimize the reconstruction error (the error between x and z).
+
+ For the denosing autoencoder, during training, first x is corrupted into 
+ \tilde{x}, where \tilde{x} is a partially destroyed version of x by means 
+ of a stochastic mapping. Afterwards y is computed as before (using 
+ \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction 
+ error is now measured between z and the uncorrupted input x, which is 
+ computed as the cross-entropy : 
+      - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
+
+
+ References :
+   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and 
+   Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
+   2008
+   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
+   Training of Deep Networks, Advances in Neural Information Processing 
+   Systems 19, 2007
+
+"""
+
+import numpy, time, cPickle, gzip, sys, os
+
+import theano
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+from logistic_sgd import LogisticRegression, load_data
+from mlp import HiddenLayer
+from dAext import dA
+
+
+
+class SdA(object):
+    """Stacked denoising auto-encoder class (SdA)
+
+    A stacked denoising autoencoder model is obtained by stacking several
+    dAs. The hidden layer of the dA at layer `i` becomes the input of 
+    the dA at layer `i+1`. The first layer dA gets as input the input of 
+    the SdA, and the hidden layer of the last dA represents the output. 
+    Note that after pretraining, the SdA is dealt with as a normal MLP, 
+    the dAs are only used to initialize the weights.
+    """
+
+    def __init__(self, numpy_rng, theano_rng = None, n_ins = 784, 
+                 hidden_layers_sizes = [500,500], n_outs = 10, 
+                 corruption_levels = [0.1, 0.1]):
+        """ This class is made to support a variable number of layers. 
+
+        :type numpy_rng: numpy.random.RandomState
+        :param numpy_rng: numpy random number generator used to draw initial 
+                    weights
+
+        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
+        :param theano_rng: Theano random generator; if None is given one is 
+                           generated based on a seed drawn from `rng`
+
+        :type n_ins: int
+        :param n_ins: dimension of the input to the sdA
+
+        :type n_layers_sizes: list of ints
+        :param n_layers_sizes: intermediate layers size, must contain 
+                               at least one value
+
+        :type n_outs: int
+        :param n_outs: dimension of the output of the network
+        
+        :type corruption_levels: list of float
+        :param corruption_levels: amount of corruption to use for each 
+                                  layer
+        """
+        
+        self.sigmoid_layers = []
+        self.dA_layers      = []
+        self.params         = []
+        self.n_layers       = len(hidden_layers_sizes)
+
+        assert self.n_layers > 0
+
+        if not theano_rng:
+            theano_rng = RandomStreams(numpy_rng.randint(2**30))
+        # allocate symbolic variables for the data
+        self.x  = T.matrix('x')  # the data is presented as rasterized images
+        self.y  = T.ivector('y') # the labels are presented as 1D vector of 
+                                 # [int] labels
+
+        # The SdA is an MLP, for which all weights of intermediate layers
+        # are shared with a different denoising autoencoders 
+        # We will first construct the SdA as a deep multilayer perceptron,
+        # and when constructing each sigmoidal layer we also construct a 
+        # denoising autoencoder that shares weights with that layer 
+        # During pretraining we will train these autoencoders (which will
+        # lead to chainging the weights of the MLP as well)
+        # During finetunining we will finish training the SdA by doing 
+        # stochastich gradient descent on the MLP
+
+        for i in xrange( self.n_layers ):
+            # construct the sigmoidal layer
+
+            # the size of the input is either the number of hidden units of 
+            # the layer below or the input size if we are on the first layer
+            if i == 0 :
+                input_size = n_ins
+            else:
+                input_size = hidden_layers_sizes[i-1]
+
+            # the input to this layer is either the activation of the hidden
+            # layer below or the input of the SdA if you are on the first
+            # layer
+            if i == 0 : 
+                layer_input = self.x
+            else:
+                layer_input = self.sigmoid_layers[-1].output
+
+            sigmoid_layer = HiddenLayer(rng   = numpy_rng, 
+                                           input = layer_input, 
+                                           n_in  = input_size, 
+                                           n_out = hidden_layers_sizes[i],
+                                           activation = T.nnet.sigmoid)
+            # add the layer to our list of layers 
+            self.sigmoid_layers.append(sigmoid_layer)
+            # its arguably a philosophical question...
+            # but we are going to only declare that the parameters of the 
+            # sigmoid_layers are parameters of the StackedDAA
+            # the visible biases in the dA are parameters of those
+            # dA, but not the SdA
+            self.params.extend(sigmoid_layer.params)
+        
+            # Construct a denoising autoencoder that shared weights with this
+            # layer
+            dA_layer = dA(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, 
+                          n_visible = input_size, 
+                          n_hidden  = hidden_layers_sizes[i],  
+                          W = sigmoid_layer.W, bhid = sigmoid_layer.b)
+            self.dA_layers.append(dA_layer)        
+
+        
+        # We now need to add a logistic layer on top of the MLP
+        self.logLayer = LogisticRegression(\
+                         input = self.sigmoid_layers[-1].output,\
+                         n_in = hidden_layers_sizes[-1], n_out = n_outs)
+
+        self.params.extend(self.logLayer.params)
+        # construct a function that implements one step of finetunining
+
+        # compute the cost for second phase of training, 
+        # defined as the negative log likelihood 
+        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
+        # compute the gradients with respect to the model parameters
+        # symbolic variable that points to the number of errors made on the
+        # minibatch given by self.x and self.y
+        self.errors = self.logLayer.errors(self.y)
+
+    def pretraining_functions(self, train_set_x, batch_size):
+        ''' Generates a list of functions, each of them implementing one 
+        step in trainnig the dA corresponding to the layer with same index.
+        The function will require as input the minibatch index, and to train
+        a dA you just need to iterate, calling the corresponding function on 
+        all minibatch indexes.
+
+        :type train_set_x: theano.tensor.TensorType
+        :param train_set_x: Shared variable that contains all datapoints used
+                            for training the dA
+
+        :type batch_size: int
+        :param batch_size: size of a [mini]batch
+
+        :type learning_rate: float
+        :param learning_rate: learning rate used during training for any of 
+                              the dA layers
+        '''
+
+        # index to a [mini]batch
+        index            = T.lscalar('index')   # index to a minibatch
+        corruption_level = T.scalar('corruption')    # amount of corruption to use
+        learning_rate    = T.scalar('lr')    # learning rate to use
+        # number of batches
+        n_batches = train_set_x.value.shape[0] / batch_size
+        # begining of a batch, given `index`
+        batch_begin = index * batch_size
+        # ending of a batch given `index`
+        batch_end = batch_begin+batch_size
+
+        pretrain_fns = []
+        for dA in self.dA_layers:
+            # get the cost and the updates list
+            cost,updates = dA.get_cost_updates( corruption_level, learning_rate)
+            # compile the theano function    
+            fn = theano.function( inputs = [index, 
+                              theano.Param(corruption_level, default = 0.2),
+                              theano.Param(learning_rate, default = 0.1)], 
+                    outputs = cost, 
+                    updates = updates,
+                    givens  = {self.x :train_set_x[batch_begin:batch_end]})
+            # append `fn` to the list of functions
+            pretrain_fns.append(fn)
+
+        return pretrain_fns
+ 
+
+    def build_finetune_functions(self, datasets, batch_size, learning_rate):
+        '''Generates a function `train` that implements one step of 
+        finetuning, a function `validate` that computes the error on 
+        a batch from the validation set, and a function `test` that 
+        computes the error on a batch from the testing set
+
+        :type datasets: list of pairs of theano.tensor.TensorType
+        :param datasets: It is a list that contain all the datasets;  
+                         the has to contain three pairs, `train`, 
+                         `valid`, `test` in this order, where each pair
+                         is formed of two Theano variables, one for the 
+                         datapoints, the other for the labels
+
+        :type batch_size: int
+        :param batch_size: size of a minibatch
+
+        :type learning_rate: float
+        :param learning_rate: learning rate used during finetune stage
+        '''
+
+        (train_set_x, train_set_y) = datasets[0]
+        (valid_set_x, valid_set_y) = datasets[1]
+        (test_set_x , test_set_y ) = datasets[2]
+
+        # compute number of minibatches for training, validation and testing
+        n_valid_batches = valid_set_x.value.shape[0] / batch_size
+        n_test_batches  = test_set_x.value.shape[0]  / batch_size
+
+        index   = T.lscalar('index')    # index to a [mini]batch 
+
+        # compute the gradients with respect to the model parameters
+        gparams = T.grad(self.finetune_cost, self.params)
+
+        # compute list of fine-tuning updates
+        updates = {}
+        for param, gparam in zip(self.params, gparams):
+            updates[param] = param - gparam*learning_rate
+
+        train_fn = theano.function(inputs = [index], 
+              outputs =   self.finetune_cost, 
+              updates = updates,
+              givens  = {
+                self.x : train_set_x[index*batch_size:(index+1)*batch_size],
+                self.y : train_set_y[index*batch_size:(index+1)*batch_size]})
+
+        test_score_i = theano.function([index], self.errors,
+                 givens = {
+                   self.x: test_set_x[index*batch_size:(index+1)*batch_size],
+                   self.y: test_set_y[index*batch_size:(index+1)*batch_size]})
+
+        valid_score_i = theano.function([index], self.errors,
+              givens = {
+                 self.x: valid_set_x[index*batch_size:(index+1)*batch_size],
+                 self.y: valid_set_y[index*batch_size:(index+1)*batch_size]})
+
+        # Create a function that scans the entire validation set
+        def valid_score():
+            return [valid_score_i(i) for i in xrange(n_valid_batches)]
+
+        # Create a function that scans the entire test set
+        def test_score():
+            return [test_score_i(i) for i in xrange(n_test_batches)]
+
+        return train_fn, valid_score, test_score
+
+
+
+
+
+
+def test_SdA( finetune_lr = 0.4, pretraining_epochs = 25, \
+              pretrain_lr = 0.4, training_epochs = 25, \
+              dataset='../data/mnist.pkl.gz', batch_size = 200):
+    """
+    Demonstrates how to train and test a stochastic denoising autoencoder.
+
+    This is demonstrated on MNIST.
+
+    :type learning_rate: float
+    :param learning_rate: learning rate used in the finetune stage 
+    (factor for the stochastic gradient)
+
+    :type pretraining_epochs: int
+    :param pretraining_epochs: number of epoch to do pretraining
+
+    :type pretrain_lr: float
+    :param pretrain_lr: learning rate to be used during pre-training
+
+    :type n_iter: int
+    :param n_iter: maximal number of iterations ot run the optimizer 
+
+    :type dataset: string
+    :param dataset: path the the pickled dataset
+
+    """
+
+    datasets = load_data(dataset)
+
+    train_set_x, train_set_y = datasets[0]
+    valid_set_x, valid_set_y = datasets[1]
+    test_set_x , test_set_y  = datasets[2]
+
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.value.shape[0] / batch_size
+
+    # numpy random generator
+    numpy_rng = numpy.random.RandomState(89677)
+    print '... building the model'
+    # construct the stacked denoising autoencoder class
+    sda = SdA( numpy_rng = numpy_rng, n_ins = 28*28, 
+                      hidden_layers_sizes = [1000,1000,1000],
+                      n_outs = 10)
+    
+
+    #########################
+    # PRETRAINING THE MODEL #
+    #########################
+    print '... getting the pretraining functions'
+    pretraining_fns = sda.pretraining_functions( 
+                                        train_set_x   = train_set_x, 
+                                        batch_size    = batch_size ) 
+
+    print '... pre-training the model'
+    start_time = time.clock()  
+    ## Pre-train layer-wise 
+    corruption_levels = [.10,.10,.10]
+    for i in xrange(sda.n_layers):
+        # go through pretraining epochs 
+        for epoch in xrange(pretraining_epochs):
+            # go through the training set
+            c = []
+            for batch_index in xrange(n_train_batches):
+                c.append( pretraining_fns[i](index = batch_index, 
+                         corruption = corruption_levels[i], 
+                         lr = pretrain_lr ) )
+            print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),numpy.mean(c)
+ 
+    end_time = time.clock()
+
+    print >> sys.stderr, ('The pretraining code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((end_time-start_time)/60.))
+    
+    ########################
+    # FINETUNING THE MODEL #
+    ########################
+
+    # get the training, validation and testing function for the model
+    print '... getting the finetuning functions'
+    train_fn, validate_model, test_model = sda.build_finetune_functions ( 
+                datasets = datasets, batch_size = batch_size, 
+                learning_rate = finetune_lr) 
+
+    print '... finetunning the model'
+    # early-stopping parameters
+    patience              = 10*n_train_batches # look as this many examples regardless
+    patience_increase     = 2.    # wait this much longer when a new best is 
+                                  # found
+    improvement_threshold = 0.995 # a relative improvement of this much is 
+                                  # considered significant
+    validation_frequency  = min(n_train_batches, patience/2)
+                                  # go through this many 
+                                  # minibatche before checking the network 
+                                  # on the validation set; in this case we 
+                                  # check every epoch 
+
+
+    best_params          = None
+    best_validation_loss = float('inf')
+    test_score           = 0.
+    start_time = time.clock()
+
+    done_looping = False
+    epoch = 0
+
+    while (epoch < training_epochs) and (not done_looping):
+        for minibatch_index in xrange(n_train_batches):
+            minibatch_avg_cost = train_fn(minibatch_index)
+            iter    = epoch * n_train_batches + minibatch_index
+
+            if (iter+1) % validation_frequency == 0:
+                validation_losses = validate_model()
+                this_validation_loss = numpy.mean(validation_losses)
+                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
+                   (epoch, minibatch_index+1, n_train_batches, \
+                    this_validation_loss*100.))
+
+
+                # if we got the best validation score until now
+                if this_validation_loss < best_validation_loss:
+
+                    #improve patience if loss improvement is good enough
+                    if this_validation_loss < best_validation_loss *  \
+                                                improvement_threshold :
+                        patience = max(patience, iter * patience_increase)
+
+                    # save best validation score and iteration number
+                    best_validation_loss = this_validation_loss
+                    best_iter = iter
+
+                    # test it on the test set
+                    test_losses = test_model()
+                    test_score = numpy.mean(test_losses)
+                    print(('     epoch %i, minibatch %i/%i, test error of best '
+                          'model %f %%') % 
+                             (epoch, minibatch_index+1, n_train_batches,
+                              test_score*100.))
+
+
+            if patience <= iter :
+                done_looping = True
+                break
+        epoch = epoch + 1
+
+    end_time = time.clock()
+    print(('Optimization complete with best validation score of %f %%,'
+           'with test performance %f %%') %  
+                 (best_validation_loss * 100., test_score*100.))
+    print >> sys.stderr, ('The training code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((end_time-start_time)/60.))
+
+
+
+
+
+
+if __name__ == '__main__':
+    test_SdA()
+
+
diff --git a/code/dA_plots/filters_corruption_0.png b/code/dA_plots/filters_corruption_0.png
new file mode 100644
index 00000000..eaffcb63
Binary files /dev/null and b/code/dA_plots/filters_corruption_0.png differ
diff --git a/code/dA_plots/filters_corruption_0_145860_25.png b/code/dA_plots/filters_corruption_0_145860_25.png
new file mode 100644
index 00000000..eaede07e
Binary files /dev/null and b/code/dA_plots/filters_corruption_0_145860_25.png differ
diff --git a/code/dA_plots/filters_corruption_261900x100.png b/code/dA_plots/filters_corruption_261900x100.png
new file mode 100644
index 00000000..05f5028b
Binary files /dev/null and b/code/dA_plots/filters_corruption_261900x100.png differ
diff --git a/code/dA_plots/filters_corruption_261900x500.png b/code/dA_plots/filters_corruption_261900x500.png
new file mode 100644
index 00000000..b8bd6f5a
Binary files /dev/null and b/code/dA_plots/filters_corruption_261900x500.png differ
diff --git a/code/dA_plots/filters_corruption_30.png b/code/dA_plots/filters_corruption_30.png
new file mode 100644
index 00000000..4efeab23
Binary files /dev/null and b/code/dA_plots/filters_corruption_30.png differ
diff --git a/code/dA_plots/filters_corruption_30_145860_25.png b/code/dA_plots/filters_corruption_30_145860_25.png
new file mode 100644
index 00000000..56484ca5
Binary files /dev/null and b/code/dA_plots/filters_corruption_30_145860_25.png differ
diff --git a/code/dA_plots/log_dA_145860_100.txt b/code/dA_plots/log_dA_145860_100.txt
new file mode 100644
index 00000000..efd98d1d
--- /dev/null
+++ b/code/dA_plots/log_dA_145860_100.txt
@@ -0,0 +1,217 @@
+Using gpu device 0: GeForce GTX 470
+... loading data
+frames = 150741 TRAIN =  145860
+input =  288
+[[ 0.4999986   0.50001061  0.50002503 ...,  0.50001675  0.4999994
+   0.50003493]
+ [ 0.50000131  0.49999517  0.5000155  ...,  0.50000322  0.5000034   0.500085  ]
+ [ 0.50001192  0.50001007  0.49999484 ...,  0.50000703  0.49997738
+   0.49995062]
+ ..., 
+ [ 0.50036383  0.50014853  0.50039202 ...,  0.50093889  0.49868685
+   0.50046623]
+ [ 0.50023872  0.5000689   0.50005275 ...,  0.4986825   0.49972135
+   0.50064117]
+ [ 0.50001383  0.49999261  0.49988553 ...,  0.49990761  0.4990716   0.499295  ]]
+Training epoch 0, cost  4.18604124889
+Training epoch 1, cost  2.63120056261
+Training epoch 2, cost  2.63757145122
+Training epoch 3, cost  2.64306500026
+Training epoch 4, cost  2.64316516051
+Training epoch 5, cost  2.63991771862
+Training epoch 6, cost  2.62174934441
+Training epoch 7, cost  2.59528159494
+Training epoch 8, cost  2.55827880759
+Training epoch 9, cost  2.53036221591
+Training epoch 10, cost  2.50064140057
+Training epoch 11, cost  2.47210054804
+Training epoch 12, cost  2.4462136745
+Training epoch 13, cost  2.42688381625
+Training epoch 14, cost  2.40377438126
+Training epoch 15, cost  2.38560681047
+Training epoch 16, cost  2.36972873175
+Training epoch 17, cost  2.35224353618
+Training epoch 18, cost  2.3381953907
+Training epoch 19, cost  2.32559930113
+Training epoch 20, cost  2.31283797391
+Training epoch 21, cost  2.30497753625
+Training epoch 22, cost  2.2933128835
+Training epoch 23, cost  2.28335845374
+Training epoch 24, cost  2.27583143725
+Training epoch 25, cost  2.26858936866
+Training epoch 26, cost  2.2620628835
+Training epoch 27, cost  2.25544882507
+Training epoch 28, cost  2.24985216989
+Training epoch 29, cost  2.24279462113
+Training epoch 30, cost  2.23837725923
+Training epoch 31, cost  2.23438450719
+Training epoch 32, cost  2.22929628582
+Training epoch 33, cost  2.22418556857
+Training epoch 34, cost  2.22108354651
+Training epoch 35, cost  2.21707713625
+Training epoch 36, cost  2.21313044722
+Training epoch 37, cost  2.20998544195
+Training epoch 38, cost  2.20594635909
+Training epoch 39, cost  2.20226011351
+Training epoch 40, cost  2.19935948319
+Training epoch 41, cost  2.19585360641
+Training epoch 42, cost  2.19397091496
+Training epoch 43, cost  2.19066093986
+Training epoch 44, cost  2.18775321263
+Training epoch 45, cost  2.18528214662
+Training epoch 46, cost  2.18255545772
+Training epoch 47, cost  2.17975271129
+Training epoch 48, cost  2.17756940517
+Training epoch 49, cost  2.17515029395
+Training epoch 50, cost  2.17346497712
+Training epoch 51, cost  2.17163852538
+Training epoch 52, cost  2.16883912656
+Training epoch 53, cost  2.16715702343
+Training epoch 54, cost  2.16592885515
+Training epoch 55, cost  2.16338802512
+Training epoch 56, cost  2.16145538744
+Training epoch 57, cost  2.15952435662
+Training epoch 58, cost  2.15784774356
+Training epoch 59, cost  2.15603829764
+Training epoch 60, cost  2.15435646232
+Training epoch 61, cost  2.15263258111
+Training epoch 62, cost  2.15083598999
+Training epoch 63, cost  2.15009710724
+Training epoch 64, cost  2.14758011214
+Training epoch 65, cost  2.14686091329
+Training epoch 66, cost  2.14493577424
+Training epoch 67, cost  2.1434399317
+Training epoch 68, cost  2.14217293124
+Training epoch 69, cost  2.14082746298
+Training epoch 70, cost  2.14039013223
+Training epoch 71, cost  2.13873454211
+Training epoch 72, cost  2.13668072124
+Training epoch 73, cost  2.13539363516
+Training epoch 74, cost  2.13416707374
+Training epoch 75, cost  2.1334285927
+Training epoch 76, cost  2.13237208946
+Training epoch 77, cost  2.13115155372
+Training epoch 78, cost  2.13028988092
+Training epoch 79, cost  2.12927421843
+Training epoch 80, cost  2.12782417108
+Training epoch 81, cost  2.12766683378
+Training epoch 82, cost  2.12608502481
+Training epoch 83, cost  2.12487319283
+Training epoch 84, cost  2.12353334854
+Training epoch 85, cost  2.12343814274
+Training epoch 86, cost  2.12165467422
+Training epoch 87, cost  2.12083879662
+Training epoch 88, cost  2.12023890631
+Training epoch 89, cost  2.11913366199
+Training epoch 90, cost  2.11807983683
+TrainiThe no corruption code for file dA.py ran for 24.39m
+ng epoch 91, cost  2.11759590746
+Training epoch 92, cost  2.1165943049
+Training epoch 93, cost  2.11573370334
+Training epoch 94, cost  2.11396201837
+Training epoch 95, cost  2.11389439681
+Training epoch 96, cost  2.11330334417
+Training epoch 97, cost  2.11206620432
+Training epoch 98, cost  2.11148198079
+Training epoch 99, cost  2.11097475212
+Training epoch 0, cost  4.07315742621
+Training epoch 1, cost  2.63372867176
+Training epoch 2, cost  2.63366493341
+Training epoch 3, cost  2.63492845237
+Training epoch 4, cost  2.63488212155
+Training epoch 5, cost  2.63581650427
+Training epoch 6, cost  2.63493782565
+Training epoch 7, cost  2.63661135892
+Training epoch 8, cost  2.63508913727
+Training epoch 9, cost  2.64286842906
+Training epoch 10, cost  2.6315966509
+Training epoch 11, cost  2.63017191142
+Training epoch 12, cost  2.62661327643
+Training epoch 13, cost  2.61729274861
+Training epoch 14, cost  2.60820387486
+Training epoch 15, cost  2.58270934029
+Training epoch 16, cost  2.56930366662
+Training epoch 17, cost  2.56869226056
+Training epoch 18, cost  2.54020309501
+Training epoch 19, cost  2.5361950779
+Training epoch 20, cost  2.51913034117
+Training epoch 21, cost  2.51773961333
+Training epoch 22, cost  2.50552434698
+Training epoch 23, cost  2.4884221171
+Training epoch 24, cost  2.48178690182
+Training epoch 25, cost  2.47160644196
+Training epoch 26, cost  2.46567690122
+Training epoch 27, cost  2.45171150847
+Training epoch 28, cost  2.4494054123
+Training epoch 29, cost  2.43754392054
+Training epoch 30, cost  2.43117892371
+Training epoch 31, cost  2.42459244952
+Training epoch 32, cost  2.42241275881
+Training epoch 33, cost  2.41123283885
+Training epoch 34, cost  2.4087931064
+Training epoch 35, cost  2.40255183695
+Training epoch 36, cost  2.39680654823
+Training epoch 37, cost  2.39457945547
+Training epoch 38, cost  2.38670643082
+Training epoch 39, cost  2.38559850842
+Training epoch 40, cost  2.37964459713
+Training epoch 41, cost  2.37848150624
+Training epoch 42, cost  2.37343921397
+Training epoch 43, cost  2.36909215172
+Training epoch 44, cost  2.36754368487
+Training epoch 45, cost  2.36483909014
+Training epoch 46, cost  2.35920761989
+Training epoch 47, cost  2.35824672417
+Training epoch 48, cost  2.35610613345
+Training epoch 49, cost  2.35196448007
+Training epoch 50, cost  2.3485084156
+Training epoch 51, cost  2.34958795035
+Training epoch 52, cost  2.34640746049
+Training epoch 53, cost  2.34146131136
+Training epoch 54, cost  2.34219296329
+Training epoch 55, cost  2.33889945838
+Training epoch 56, cost  2.33597767123
+Training epoch 57, cost  2.33485046663
+Training epoch 58, cost  2.33332931621
+Training epoch 59, cost  2.33153473365
+Training epoch 60, cost  2.32852537322
+Training epoch 61, cost  2.32647182015
+Training epoch 62, cost  2.3264766407
+Training epoch 63, cost  2.32504975876
+Training epoch 64, cost  2.32362501928
+Training epoch 65, cost  2.31910093583
+Training epoch 66, cost  2.3193325899
+Training epoch 67, cost  2.31832188186
+Training epoch 68, cost  2.31731010258
+Training epoch 69, cost  2.31425896408
+Training epoch 70, cost  2.31354820119
+Training epoch 71, cost  2.3129555417
+Training epoch 72, cost  2.31029593874
+Training epoch 73, cost  2.31095340781
+Training epoch 74, cost  2.30849117947
+Training epoch 75, cost  2.3075484947
+Training epoch 76, cost  2.30678684826
+Training epoch 77, cost  2.30503109788
+Training epoch 78, cost  2.30244166067
+Training epoch 79, cost  2.30401851519
+Training epoch 80, cost  2.30116019856
+Training epoch 81, cost  2.30084793423
+Training epoch 82, cost  2.29852175245
+Training epoch 83, cost  2.29725287733
+Training epoch 84, cost  2.29584233169
+Training epoch 85, cost  2.29692454451
+Training epoch 86, cost  2.29505444005
+Training epoch 87, cost  2.29498963047
+Training epoch 88, cost  2.29420147102
+Training epoch 89, cost  2.29307078491
+Training epoch 90, cost  2.29191733512
+Training epoch 91, cost  2.29119800237
+Training epoch 92, cost  2.28983753685
+Training epoch 93, cost  2.29037208196
+Training epoch 94, cost  2.28776630845
+Training epoch 95, cost  2.28908740616
+Training epoch 96, cost  2.2The 30% corruption code for file dA.py ran for 23.59m
+8792886801
+Training epoch 97, cost  2.2865686703
+Training epoch 98, cost  2.28574542798
+Training epoch 99, cost  2.28418035702
diff --git a/code/dAau.py b/code/dAau.py
new file mode 100755
index 00000000..d9401551
--- /dev/null
+++ b/code/dAau.py
@@ -0,0 +1,355 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+
+"""
+ This tutorial introduces denoising auto-encoders (dA) using Theano.
+
+ Denoising autoencoders are the building blocks for SdA. 
+ They are based on auto-encoders as the ones used in Bengio et al. 2007.
+ An autoencoder takes an input x and first maps it to a hidden representation
+ y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting 
+ latent representation y is then mapped back to a "reconstructed" vector 
+ z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b').  The weight 
+ matrix W' can optionally be constrained such that W' = W^T, in which case 
+ the autoencoder is said to have tied weights. The network is trained such 
+ that to minimize the reconstruction error (the error between x and z).
+
+ For the denosing autoencoder, during training, first x is corrupted into 
+ \tilde{x}, where \tilde{x} is a partially destroyed version of x by means 
+ of a stochastic mapping. Afterwards y is computed as before (using 
+ \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction 
+ error is now measured between z and the uncorrupted input x, which is 
+ computed as the cross-entropy : 
+      - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
+
+
+ References :
+   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and 
+   Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
+   2008
+   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
+   Training of Deep Networks, Advances in Neural Information Processing 
+   Systems 19, 2007
+
+"""
+
+import numpy, time, cPickle, gzip, sys, os
+
+import theano
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+from load_mp3 import *
+from utils import tile_raster_images
+
+import PIL.Image
+
+
+class dAau(object):
+    """Denoising Auto-Encoder class (dAau) 
+
+    A denoising autoencoders tries to reconstruct the input from a corrupted 
+    version of it by projecting it first in a latent space and reprojecting 
+    it afterwards back in the input space. Please refer to Vincent et al.,2008
+    for more details. If x is the input then equation (1) computes a partially
+    destroyed version of x by means of a stochastic mapping q_D. Equation (2) 
+    computes the projection of the input into the latent space. Equation (3) 
+    computes the reconstruction of the input, while equation (4) computes the 
+    reconstruction error.
+  
+    .. math::
+
+        \tilde{x} ~ q_D(\tilde{x}|x)                                     (1)
+
+        y = s(W \tilde{x} + b)                                           (2)
+
+        x = s(W' y  + b')                                                (3)
+
+        L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)]      (4)
+
+    """
+
+    def __init__(self, numpy_rng, theano_rng = None, input = None, n_visible= 784, n_hidden= 500, 
+               W = None, bhid = None, bvis = None):
+        """
+        Initialize the dA class by specifying the number of visible units (the 
+        dimension d of the input ), the number of hidden units ( the dimension 
+        d' of the latent or hidden space ) and the corruption level. The 
+        constructor also receives symbolic variables for the input, weights and 
+        bias. Such a symbolic variables are useful when, for example the input is 
+        the result of some computations, or when weights are shared between the 
+        dA and an MLP layer. When dealing with SdAs this always happens,
+        the dA on layer 2 gets as input the output of the dA on layer 1, 
+        and the weights of the dA are used in the second stage of training 
+        to construct an MLP.
+
+        :type numpy_rng: numpy.random.RandomState
+        :param numpy_rng: number random generator used to generate weights
+
+        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
+        :param theano_rng: Theano random generator; if None is given one is generated
+                     based on a seed drawn from `rng`
+
+        :type input: theano.tensor.TensorType
+        :param input: a symbolic description of the input or None for standalone
+                      dA
+
+        :type n_visible: int
+        :param n_visible: number of visible units
+
+        :type n_hidden: int
+        :param n_hidden:  number of hidden units
+
+        :type W: theano.tensor.TensorType
+        :param W: Theano variable pointing to a set of weights that should be 
+                  shared belong the dA and another architecture; if dA should 
+                  be standalone set this to None
+
+        :type bhid: theano.tensor.TensorType
+        :param bhid: Theano variable pointing to a set of biases values (for 
+                     hidden units) that should be shared belong dA and another 
+                     architecture; if dA should be standalone set this to None
+
+        :type bvis: theano.tensor.TensorType
+        :param bvis: Theano variable pointing to a set of biases values (for 
+                     visible units) that should be shared belong dA and another
+                     architecture; if dA should be standalone set this to None
+
+
+        """
+        self.n_visible = n_visible
+        self.n_hidden  = n_hidden
+
+        # create a Theano random generator that gives symbolic random values
+        if not theano_rng : 
+            theano_rng = RandomStreams(rng.randint(2**30))
+
+        # note : W' was written as `W_prime` and b' as `b_prime`
+        if not W:
+            # W is initialized with `initial_W` which is uniformely sampled
+            # from -4*sqrt(6./(n_visible+n_hidden)) and
+            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
+            # converted using asarray to dtype
+            # theano.config.floatX so that the code is runable on GPU
+            initial_W = numpy.asarray( numpy_rng.uniform( 
+                      low  = -4*numpy.sqrt(6./(n_hidden+n_visible)), 
+                      high =  4*numpy.sqrt(6./(n_hidden+n_visible)), 
+                      size = (n_visible, n_hidden)), dtype = theano.config.floatX)
+            W = theano.shared(value = initial_W, name ='W')
+
+        if not bvis:
+            bvis = theano.shared(value = numpy.zeros(n_visible, 
+                                         dtype = theano.config.floatX))
+
+        if not bhid:
+            bhid = theano.shared(value = numpy.zeros(n_hidden,
+                                dtype = theano.config.floatX), name ='b')
+
+
+        self.W = W
+        # b corresponds to the bias of the hidden 
+        self.b = bhid
+        # b_prime corresponds to the bias of the visible
+        self.b_prime = bvis
+        # tied weights, therefore W_prime is W transpose
+        self.W_prime = self.W.T 
+        self.theano_rng = theano_rng
+        # if no input is given, generate a variable representing the input
+        if input == None : 
+            # we use a matrix because we expect a minibatch of several examples,
+            # each example being a row
+            self.x = T.dmatrix(name = 'input') 
+        else:
+            self.x = input
+
+        self.params = [self.W, self.b, self.b_prime]
+
+    def get_corrupted_input(self, input, corruption_level):
+        """ This function keeps ``1-corruption_level`` entries of the inputs the same 
+        and zero-out randomly selected subset of size ``coruption_level`` 
+        Note : first argument of theano.rng.binomial is the shape(size) of 
+               random numbers that it should produce
+               second argument is the number of trials 
+               third argument is the probability of success of any trial
+        
+                this will produce an array of 0s and 1s where 1 has a probability of 
+                1 - ``corruption_level`` and 0 with ``corruption_level``
+
+                The binomial function return int64 data type by default. 
+                int64 multiplicated by the input type(floatX) always return float64.
+                To keep all data in floatX when floatX is float32, we set the dtype
+                of the binomial to floatX. As in our case the value of the binomial 
+                is always 0 or 1, this don't change the result. This is needed to allow
+                the gpu to work correctly as it only support float32 for now.
+        """
+        corruption = self.theano_rng.binomial( size = input.shape, n = 1, p =  1 - corruption_level, dtype=theano.config.floatX)
+        return  input * corruption + (1 - corruption) * 0.5
+
+    
+    def get_hidden_values(self, input):
+        """ Computes the values of the hidden layer """
+        return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
+
+    def get_reconstructed_input(self, hidden ):
+        """ Computes the reconstructed input given the values of the hidden layer """
+        return  T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
+    
+    def get_cost_updates(self, corruption_level, learning_rate):
+        """ This function computes the cost and the updates for one trainng
+        step of the dA """
+
+        tilde_x = self.get_corrupted_input(self.x, corruption_level)
+        y       = self.get_hidden_values( tilde_x)
+        z       = self.get_reconstructed_input(y)
+        # note : we sum over the size of a datapoint; if we are using minibatches,
+        #        L will  be a vector, with one entry per example in minibatch
+
+        # L = - T.sum( self.x*T.log(z) + (1-self.x)*T.log(1-z), axis=1 )
+        L = T.sum( T.abs_(z - self.x), axis=1 )
+
+        # note : L is now a vector, where each element is the cross-entropy cost 
+        #        of the reconstruction of the corresponding example of the 
+        #        minibatch. We need to compute the average of all these to get 
+        #        the cost of the minibatch
+        cost = T.mean(L)
+
+        # compute the gradients of the cost of the `dA` with respect
+        # to its parameters 
+        gparams = T.grad(cost, self.params)
+        # generate the list of updates
+        updates = {}
+        for param, gparam in zip(self.params, gparams):
+            updates[param] = param -  learning_rate*gparam
+    
+        return (cost, updates)
+
+
+
+
+
+from load_mp3 import *
+
+def test_dA( learning_rate = 0.1, training_epochs = 15, dataset ='/home/dmitry/mp3/hhgttg01010060.mp3',
+        batch_size = 20, output_folder = 'dA_plots' ):
+
+    """
+    This demo is tested on /home/dmitry/mp3/hhgttg01010060.mp2
+
+    :type learning_rate: float
+    :param learning_rate: learning rate used for training the DeNosing AutoEncoder
+
+    :type training_epochs: int
+    :param training_epochs: number of epochs used for training 
+
+    :type dataset: string
+    :param dataset: path to the picked dataset
+
+    """
+    datasets = load_data(dataset)
+    train_set_x, train_set_y = datasets[0]
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.value.shape[0] / batch_size
+
+    # allocate symbolic variables for the data
+    index = T.lscalar()    # index to a [mini]batch 
+    x     = T.matrix('x')  # the data is presented as rasterized images
+
+    
+    if not os.path.isdir(output_folder):
+        os.makedirs(output_folder)
+    os.chdir(output_folder)
+    ####################################
+    # BUILDING THE MODEL NO CORRUPTION #
+    ####################################
+
+    rng        = numpy.random.RandomState(123)
+    theano_rng = RandomStreams( rng.randint(2**30))
+
+    da = dA(numpy_rng = rng, theano_rng = theano_rng, input = x,
+            n_visible = S_DIM * F_DIM * X_DIM, n_hidden = 1000)
+
+    cost, updates = da.get_cost_updates(corruption_level = 0.,
+                                learning_rate = learning_rate)
+
+    
+    train_da = theano.function([index], cost, updates = updates,
+         givens = {x:train_set_x[index*batch_size:(index+1)*batch_size]})
+    
+    start_time = time.clock()
+
+    ############
+    # TRAINING #
+    ############
+
+    # go through training epochs
+    for epoch in xrange(training_epochs):
+        # go through trainng set
+        c = []
+        for batch_index in xrange(n_train_batches):
+            c.append(train_da(batch_index))
+
+        print 'Training epoch %d, cost '%epoch, numpy.mean(c)
+
+    end_time = time.clock()
+
+    training_time = (end_time - start_time)
+
+    print >> sys.stderr, ('The no corruption code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((training_time)/60.))
+    image = PIL.Image.fromarray(tile_raster_images( X = da.W.value.T,
+                 img_shape = (S_DIM, F_DIM * X_DIM),tile_shape = (10,10), 
+                 tile_spacing=(1,1)))
+    image.save('filters_corruption_0.png') 
+ 
+    #####################################
+    # BUILDING THE MODEL CORRUPTION 30% #
+    #####################################
+
+    rng        = numpy.random.RandomState(123)
+    theano_rng = RandomStreams( rng.randint(2**30))
+
+    da = dAau(numpy_rng = rng, theano_rng = theano_rng, input = x,
+            n_visible = S_DIM * F_DIM * X_DIM, n_hidden = 1000)
+
+    cost, updates = da.get_cost_updates(corruption_level = 0.3,
+                                learning_rate = learning_rate)
+
+    
+    train_da = theano.function([index], cost, updates = updates,
+         givens = {x:train_set_x[index*batch_size:(index+1)*batch_size]})
+
+    start_time = time.clock()
+
+    ############
+    # TRAINING #
+    ############
+
+    # go through training epochs
+    for epoch in xrange(training_epochs):
+        # go through trainng set
+        c = []
+        for batch_index in xrange(n_train_batches):
+            c.append(train_da(batch_index))
+
+        print 'Training epoch %d, cost '%epoch, numpy.mean(c)
+
+    end_time = time.clock()
+
+    training_time = (end_time - start_time)
+
+    print >> sys.stderr, ('The 30% corruption code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % (training_time/60.))
+
+    image = PIL.Image.fromarray(tile_raster_images( X = da.W.value.T,
+                 img_shape = (S_DIM, F_DIM * X_DIM),tile_shape = (10,10), 
+                 tile_spacing=(1,1)))
+    image.save('filters_corruption_30.png') 
+ 
+    os.chdir('../')
+
+
+if __name__ == "__main__":    
+    d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy"
+    dataset = sorted([os.path.join(d, f) for f in os.listdir(d)])
+    #dataset = ["/home/dmitry/mp3/hhgttg01010060.mp3"]
+    test_dA(dataset = dataset[:5], training_epochs = 100)
+
diff --git a/code/dAext.py b/code/dAext.py
new file mode 100644
index 00000000..26b3b228
--- /dev/null
+++ b/code/dAext.py
@@ -0,0 +1,375 @@
+"""
+ This tutorial introduces denoising auto-encoders (dA) using Theano.
+
+ Denoising autoencoders are the building blocks for SdA. 
+ They are based on auto-encoders as the ones used in Bengio et al. 2007.
+ An autoencoder takes an input x and first maps it to a hidden representation
+ y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting 
+ latent representation y is then mapped back to a "reconstructed" vector 
+ z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b').  The weight 
+ matrix W' can optionally be constrained such that W' = W^T, in which case 
+ the autoencoder is said to have tied weights. The network is trained such 
+ that to minimize the reconstruction error (the error between x and z).
+
+ For the denosing autoencoder, during training, first x is corrupted into 
+ \tilde{x}, where \tilde{x} is a partially destroyed version of x by means 
+ of a stochastic mapping. Afterwards y is computed as before (using 
+ \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction 
+ error is now measured between z and the uncorrupted input x, which is 
+ computed as the cross-entropy : 
+      - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
+
+
+ References :
+   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and 
+   Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
+   2008
+   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
+   Training of Deep Networks, Advances in Neural Information Processing 
+   Systems 19, 2007
+
+"""
+
+import numpy, time, cPickle, gzip, sys, os
+
+import theano
+import theano.tensor as T
+from theano.tensor.shared_randomstreams import RandomStreams
+
+from logistic_sgd import load_data
+from utils import tile_raster_images
+
+import PIL.Image
+
+
+class dA(object):
+    """Denoising Auto-Encoder class (dA) 
+
+    A denoising autoencoders tries to reconstruct the input from a corrupted 
+    version of it by projecting it first in a latent space and reprojecting 
+    it afterwards back in the input space. Please refer to Vincent et al.,2008
+    for more details. If x is the input then equation (1) computes a partially
+    destroyed version of x by means of a stochastic mapping q_D. Equation (2) 
+    computes the projection of the input into the latent space. Equation (3) 
+    computes the reconstruction of the input, while equation (4) computes the 
+    reconstruction error.
+  
+    .. math::
+
+        \tilde{x} ~ q_D(\tilde{x}|x)                                     (1)
+
+        y = s(W \tilde{x} + b)                                           (2)
+
+        x = s(W' y  + b')                                                (3)
+
+        L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)]      (4)
+
+    """
+
+    def __init__(self, numpy_rng, theano_rng = None, input = None, n_visible= 784, n_hidden= 500, 
+               W = None, bhid = None, bvis = None):
+        """
+        Initialize the dA class by specifying the number of visible units (the 
+        dimension d of the input ), the number of hidden units ( the dimension 
+        d' of the latent or hidden space ) and the corruption level. The 
+        constructor also receives symbolic variables for the input, weights and 
+        bias. Such a symbolic variables are useful when, for example the input is 
+        the result of some computations, or when weights are shared between the 
+        dA and an MLP layer. When dealing with SdAs this always happens,
+        the dA on layer 2 gets as input the output of the dA on layer 1, 
+        and the weights of the dA are used in the second stage of training 
+        to construct an MLP.
+
+        :type numpy_rng: numpy.random.RandomState
+        :param numpy_rng: number random generator used to generate weights
+
+        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
+        :param theano_rng: Theano random generator; if None is given one is generated
+                     based on a seed drawn from `rng`
+
+        :type input: theano.tensor.TensorType
+        :param input: a symbolic description of the input or None for standalone
+                      dA
+
+        :type n_visible: int
+        :param n_visible: number of visible units
+
+        :type n_hidden: int
+        :param n_hidden:  number of hidden units
+
+        :type W: theano.tensor.TensorType
+        :param W: Theano variable pointing to a set of weights that should be 
+                  shared belong the dA and another architecture; if dA should 
+                  be standalone set this to None
+
+        :type bhid: theano.tensor.TensorType
+        :param bhid: Theano variable pointing to a set of biases values (for 
+                     hidden units) that should be shared belong dA and another 
+                     architecture; if dA should be standalone set this to None
+
+        :type bvis: theano.tensor.TensorType
+        :param bvis: Theano variable pointing to a set of biases values (for 
+                     visible units) that should be shared belong dA and another
+                     architecture; if dA should be standalone set this to None
+
+
+        """
+        self.n_visible = n_visible
+        self.n_hidden  = n_hidden
+
+        # create a Theano random generator that gives symbolic random values
+        if not theano_rng : 
+            theano_rng = RandomStreams(rng.randint(2**30))
+
+        # note : W' was written as `W_prime` and b' as `b_prime`
+        if not W:
+            # W is initialized with `initial_W` which is uniformely sampled
+            # from -4*sqrt(6./(n_visible+n_hidden)) and
+            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
+            # converted using asarray to dtype
+            # theano.config.floatX so that the code is runable on GPU
+            initial_W = numpy.asarray( numpy_rng.uniform( 
+                      low  = -4*numpy.sqrt(6./(n_hidden+n_visible)), 
+                      high =  4*numpy.sqrt(6./(n_hidden+n_visible)), 
+                      size = (n_visible, n_hidden)), dtype = theano.config.floatX)
+            W = theano.shared(value = initial_W, name ='W')
+
+        if not bvis:
+            bvis = theano.shared(value = numpy.zeros(n_visible, 
+                                         dtype = theano.config.floatX))
+
+        if not bhid:
+            bhid = theano.shared(value = numpy.zeros(n_hidden,
+                                dtype = theano.config.floatX), name ='b')
+
+
+        self.W = W
+        # b corresponds to the bias of the hidden 
+        self.b = bhid
+        # b_prime corresponds to the bias of the visible
+        self.b_prime = bvis
+        # tied weights, therefore W_prime is W transpose
+        self.W_prime = self.W.T 
+        self.theano_rng = theano_rng
+        # if no input is given, generate a variable representing the input
+        if input == None : 
+            # we use a matrix because we expect a minibatch of several examples,
+            # each example being a row
+            self.x = T.dmatrix(name = 'input') 
+        else:
+            self.x = input
+
+        self.params = [self.W, self.b, self.b_prime]
+
+    def get_corrupted_input(self, input, corruption_level):
+        """ This function keeps ``1-corruption_level`` entries of the inputs the same 
+        and zero-out randomly selected subset of size ``coruption_level`` 
+        Note : first argument of theano.rng.binomial is the shape(size) of 
+               random numbers that it should produce
+               second argument is the number of trials 
+               third argument is the probability of success of any trial
+        
+                this will produce an array of 0s and 1s where 1 has a probability of 
+                1 - ``corruption_level`` and 0 with ``corruption_level``
+
+                The binomial function return int64 data type by default. 
+                int64 multiplicated by the input type(floatX) always return float64.
+                To keep all data in floatX when floatX is float32, we set the dtype
+                of the binomial to floatX. As in our case the value of the binomial 
+                is always 0 or 1, this don't change the result. This is needed to allow
+                the gpu to work correctly as it only support float32 for now.
+        """
+        
+        n = input.shape[0]
+        shuffled = T.permute_row_elements(input.T, self.theano_rng.permutation(n=n)).T
+        d = T.cast(self.n_visible * corruption_level, 'int32')
+        start = self.theano_rng.random_integers(low = 0, high = self.n_visible - d) 
+        end = start + d
+        corrupted = T.set_subtensor(input[:, start:end], shuffled[:, start:end])
+        return  self.theano_rng.binomial( size = input.shape, n = 1, p =  1 - corruption_level, dtype=theano.config.floatX) * corrupted
+
+    
+    def get_hidden_values(self, input):
+        """ Computes the values of the hidden layer """
+        return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
+
+    def get_reconstructed_input(self, hidden ):
+        """ Computes the reconstructed input given the values of the hidden layer """
+        return  T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
+    
+    def get_cost_updates(self, corruption_level, learning_rate):
+        """ This function computes the cost and the updates for one trainng
+        step of the dA """
+
+        tilde_x = self.get_corrupted_input(self.x, corruption_level)
+        y       = self.get_hidden_values( tilde_x)
+        z       = self.get_reconstructed_input(y)
+        # note : we sum over the size of a datapoint; if we are using minibatches,
+        #        L will  be a vector, with one entry per example in minibatch
+        L = - T.sum( self.x*T.log(z) + (1-self.x)*T.log(1-z), axis=1 ) 
+        # note : L is now a vector, where each element is the cross-entropy cost 
+        #        of the reconstruction of the corresponding example of the 
+        #        minibatch. We need to compute the average of all these to get 
+        #        the cost of the minibatch
+        cost = T.mean(L)
+
+        # compute the gradients of the cost of the `dA` with respect
+        # to its parameters 
+        gparams = T.grad(cost, self.params)
+        # generate the list of updates
+        updates = {}
+        for param, gparam in zip(self.params, gparams):
+            updates[param] = param -  learning_rate*gparam
+    
+        return (cost, updates)
+
+
+
+
+def test_dA( learning_rate = 0.1, training_epochs = 15, dataset ='../data/mnist.pkl.gz',
+        batch_size = 200, output_folder = 'dA_plots' ):
+
+    """
+    This demo is tested on MNIST
+
+    :type learning_rate: float
+    :param learning_rate: learning rate used for training the DeNosing AutoEncoder
+
+    :type training_epochs: int
+    :param training_epochs: number of epochs used for training 
+
+    :type dataset: string
+    :param dataset: path to the picked dataset
+
+    """
+    datasets = load_data(dataset)
+    train_set_x, train_set_y = datasets[0]
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.value.shape[0] / batch_size
+
+    # allocate symbolic variables for the data
+    index = T.lscalar()    # index to a [mini]batch 
+    x     = T.matrix('x')  # the data is presented as rasterized images
+
+    
+    if not os.path.isdir(output_folder):
+        os.makedirs(output_folder)
+    os.chdir(output_folder)
+
+    ####################################
+    # PLOT GOOD/CORRUPTED INPUT        #
+    ####################################
+    rng        = numpy.random.RandomState(123)
+    theano_rng = RandomStreams( rng.randint(2**30))
+    da = dA(numpy_rng = rng, theano_rng = theano_rng, input = x,
+            n_visible = 28*28, n_hidden = 500)
+    
+    
+    #X = numpy.array(train_set_x.value[0:batch_size])
+    #for x in X:
+    #    d = 28*28//5
+    #    j = numpy.random.randint(28*28 - d)
+    #    x[j:j+d] = X[numpy.random.randint(len(X))][j:j+d]
+
+    
+    corrupt_input = theano.function([],
+                               [da.get_corrupted_input(input = x, corruption_level = 0.3)], 
+                               givens = {x:train_set_x[0:batch_size]})
+    X = corrupt_input()[0]
+    image = PIL.Image.fromarray(tile_raster_images( X = X,
+                 img_shape = (28,28),tile_shape = (10,10), 
+                 tile_spacing=(1,1), scale_rows_to_unit_interval = False))
+    image.save('input_corruption_30.png') 
+
+    
+    ####################################
+    # BUILDING THE MODEL NO CORRUPTION #
+    ####################################
+
+    rng        = numpy.random.RandomState(123)
+    theano_rng = RandomStreams( rng.randint(2**30))
+
+    da = dA(numpy_rng = rng, theano_rng = theano_rng, input = x,
+            n_visible = 28*28, n_hidden = 500)
+
+    cost, updates = da.get_cost_updates(corruption_level = 0.,
+                                learning_rate = learning_rate)
+
+    
+    train_da = theano.function([index], cost, updates = updates,
+         givens = {x:train_set_x[index*batch_size:(index+1)*batch_size]})
+    
+    start_time = time.clock()
+
+    ############
+    # TRAINING #
+    ############
+
+    # go through training epochs
+    for epoch in xrange(training_epochs):
+        # go through trainng set
+        c = []
+        for batch_index in xrange(n_train_batches):
+            c.append(train_da(batch_index))
+
+        print 'Training epoch %d, cost '%epoch, numpy.mean(c)
+
+    end_time = time.clock()
+
+    training_time = (end_time - start_time)
+
+    print >> sys.stderr, ('The no corruption code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((training_time)/60.))
+    image = PIL.Image.fromarray(tile_raster_images( X = da.W.value.T,
+                 img_shape = (28,28),tile_shape = (10,10), 
+                 tile_spacing=(1,1)))
+    image.save('filters_corruption_0.png') 
+ 
+    #####################################
+    # BUILDING THE MODEL CORRUPTION 30% #
+    #####################################
+
+    rng        = numpy.random.RandomState(123)
+    theano_rng = RandomStreams( rng.randint(2**30))
+
+    da = dA(numpy_rng = rng, theano_rng = theano_rng, input = x,
+            n_visible = 28*28, n_hidden = 500)
+
+    cost, updates = da.get_cost_updates(corruption_level = 0.3,
+                                learning_rate = learning_rate)
+
+    
+    train_da = theano.function([index], cost, updates = updates,
+         givens = {x:train_set_x[index*batch_size:(index+1)*batch_size]})
+
+    start_time = time.clock()
+
+    ############
+    # TRAINING #
+    ############
+
+    # go through training epochs
+    for epoch in xrange(training_epochs):
+        # go through trainng set
+        c = []
+        for batch_index in xrange(n_train_batches):
+            c.append(train_da(batch_index))
+
+        print 'Training epoch %d, cost '%epoch, numpy.mean(c)
+
+    end_time = time.clock()
+
+    training_time = (end_time - start_time)
+
+    print >> sys.stderr, ('The 30% corruption code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % (training_time/60.))
+
+    image = PIL.Image.fromarray(tile_raster_images( X = da.W.value.T,
+                 img_shape = (28,28),tile_shape = (10,10), 
+                 tile_spacing=(1,1)))
+    image.save('filters_corruption_30.png') 
+ 
+    os.chdir('../')
+
+
+if __name__ == '__main__':
+    test_dA()
diff --git a/code/kNN.py b/code/kNN.py
new file mode 100755
index 00000000..f35b71a9
--- /dev/null
+++ b/code/kNN.py
@@ -0,0 +1,139 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+
+"""
+ Naive kNN test/benchmarks.
+"""
+
+from __future__ import division
+import numpy, time, cPickle, gzip, sys, os, heapq
+import theano
+import theano.tensor as T
+from operator import itemgetter
+from utils import tile_raster_images
+import PIL.Image
+
+# Needs: ../data/mnist.pkl.gz
+from logistic_sgd import load_data
+
+
+
+def test_kNN( dataset ='../data/mnist.pkl.gz',
+              k = 9,
+              distance = T.sqr,
+              batch_size = 10000,
+              train_set_size = float('infinity'),
+              test_set_size = 1000,
+              img_shape = (28, 28),
+              output_folder = 'plots' ):
+
+    """
+    This demo is tested on '../data/mnist.pkl.gz'
+
+    :type batch_size: int
+    :param batch_size: batch_size used for training 
+
+    :type dataset: string
+    :param dataset: path to the picked dataset
+
+    """
+    
+    from theano import ProfileMode
+    profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
+
+    
+    # load data
+    ((train_set_x, train_set_y), (valid_set_x, valid_set_y), 
+        (test_set_x , test_set_y )) = load_data(dataset)
+        
+    # compute number of minibatches for training, validation and testing
+    train_set_size = min(train_set_x.value.shape[0], train_set_size)
+    test_set_size = min(test_set_x.value.shape[0], test_set_size)
+    n_train_batches = train_set_size / batch_size
+
+    # 
+    assert train_set_size > 0
+    assert test_set_size > 0
+    
+    # adjust img_shape    
+    assert train_set_x.value[0].shape[0] == img_shape[0] * img_shape[1]
+
+
+    # allocate symbolic variables for the data
+    indexMB = T.lscalar()    # index to a [mini]batch 
+    indexT  = T.lscalar()    # index to a test set item
+    a     = T.matrix('a')  # the data is presented as vectors
+    b     = T.vector('b')  # the data is presented as vectors
+    
+    
+    if not os.path.isdir(output_folder):
+        os.makedirs(output_folder)
+    os.chdir(output_folder)
+
+    #############################################
+    # Exhaustive kNN queries for the test set   #
+    #############################################
+    
+    # define theano.function
+    DistancesF = T.sum( distance(a - b), axis=1 )        
+    calc_distances = theano.function([indexMB, indexT], DistancesF,
+         givens = {a:train_set_x[indexMB*batch_size:(indexMB+1)*batch_size], 
+                   b:test_set_x[indexT]},
+         mode=profmode)
+    
+    # go through test set
+    start_time = time.clock()
+    kNN = []
+    for test_index in xrange(test_set_size):
+        c = []
+        for batch_index in xrange(n_train_batches):
+            batchDistances = calc_distances(batch_index, test_index)
+            batchKNN = [(batch_index * batch_size + i, batchDistances[i]) \
+                            for i in batchDistances.argsort()[:k] ]
+            c.extend(batchKNN)
+            
+        c.sort(key=itemgetter(1))
+        kNN.append(c[:k])
+        
+        # print out time/ETA
+        if(test_index and not test_index % 100):
+            test_time = time.clock() - start_time + 0.00000001; 
+            ETA = ((test_time / test_index) * test_set_size - test_time); 
+            print "%.1f seconds; test %d (%d); ETA %.1f seconds" % (test_time, test_index, test_set_size, ETA),
+            print "Gdists/sec = %f (3 flop/dist)" % (28*28*train_set_size*test_index / 1000000000 / test_time)
+        
+    end_time = time.clock()
+    
+    #for t in kNN: print t
+    training_time = (end_time - start_time)
+    profmode.print_summary()
+    print >> sys.stderr, ('The kNN search code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((training_time)/60.))
+    
+    
+    ### FIXME, why using test_set_x.value[i] directly eats memory? 
+    ###        the whole thing gets transferred from the GPU?
+    #  
+    # X = [train_set_x.value[i] for i in xrange(train_set_x.value.shape[0])]
+     
+    #############################################
+    # Plot Results                              #
+    #############################################
+    tN = 100; X = []
+    for test_index in xrange(test_set_size):
+        X.append(numpy.array(test_set_x.value[test_index]))                          
+        for i in kNN[test_index]: X.append(numpy.array(train_set_x.value[i[0]]))
+        if len(X) >= tN: break
+    
+    plot_file_name = os.path.split(dataset)[1] + "-kNN.png"
+    PIL.Image.fromarray( tile_raster_images( numpy.asarray(X),
+                              img_shape = img_shape, tile_shape = (tN//(1+k),10),
+                              tile_spacing=(1,1), scale_rows_to_unit_interval = False,
+                              output_pixel_vals = True)).save(plot_file_name)
+    print >> sys.stderr, ("Produced plot %s/%s (1T%dNN)" % (output_folder, plot_file_name, k)) 
+    
+    os.chdir('../')
+
+
+if __name__ == "__main__":    
+    test_kNN()
+
diff --git a/code/load_distances.py b/code/load_distances.py
new file mode 100755
index 00000000..d14ee7f1
--- /dev/null
+++ b/code/load_distances.py
@@ -0,0 +1,68 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+from __future__ import division
+
+import mad, numpy, random, os, time, cPickle, gzip, sys
+from theano.tensor.shared_randomstreams import RandomStreams
+from utils import tile_raster_images
+import matplotlib.pyplot
+import theano
+import theano.tensor as T
+
+# corpus size
+N_DIM = 576  # 576 dims
+(TRAIN, VALID, TEST) = (250000, 10, 10)
+
+   
+def load_data(dataset = ""):
+    ''' Loads the fake 'distances' dataset:
+         [[label/N, label/N, ...] : label, ...] for label in [0..N]
+
+    :type dataset: string
+    :param dataset: the path to the dataset (ignored)
+    '''
+
+    #############
+    # LOAD DATA #
+    #############
+    print '... loading data'
+
+    # make train/valid/test datasets
+    train_set = ([[i/TRAIN]*N_DIM   for i in xrange(0, TRAIN)], 
+                 [i for i in xrange(0, TRAIN)] )
+    valid_set = ([[i/VALID]*N_DIM for i in xrange(VALID)], 
+                 [i for i in xrange(VALID)] )
+    test_set = ([[i/TEST]*N_DIM for i in xrange(TEST)],
+                [i for i in xrange(TEST)] )
+    
+
+    def shared_dataset(data_xy):
+        """ Function that loads the dataset into shared variables
+        
+        The reason we store our dataset in shared variables is to allow 
+        Theano to copy it into the GPU memory (when code is run on GPU). 
+        Since copying data into the GPU is slow, copying a minibatch everytime
+        is needed (the default behaviour if the data is not in a shared 
+        variable) would lead to a large decrease in performance.
+        """
+        data_x, data_y = data_xy
+        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+        # When storing data on the GPU it has to be stored as floats
+        # therefore we will store the labels as ``floatX`` as well
+        # (``shared_y`` does exactly that). But during our computations
+        # we need them as ints (we use labels as index, and if they are 
+        # floats it doesn't make sense) therefore instead of returning 
+        # ``shared_y`` we will have to cast it to int. This little hack
+        # lets ous get around this issue
+        return shared_x, T.cast(shared_y, 'int32')
+
+    test_set_x,  test_set_y  = shared_dataset(test_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    train_set_x, train_set_y = shared_dataset(train_set)
+    rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
+    return rval
+
+
+if __name__ == "__main__":    
+    load_data()
diff --git a/code/load_mnist.py b/code/load_mnist.py
new file mode 100755
index 00000000..cdee7217
--- /dev/null
+++ b/code/load_mnist.py
@@ -0,0 +1,92 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+from __future__ import division
+
+"""
+Load mnist digits
+"""
+__docformat__ = 'restructedtext en'
+
+import numpy, time, cPickle, gzip, sys, os
+from utils import tile_raster_images
+
+import theano
+import theano.tensor as T
+
+
+A_DIM = 28   
+B_DIM = 28
+N_DIM = 28*28
+
+
+
+def load_data(dataset = '../data/mnist.pkl.gz'):
+    ''' Loads the dataset
+
+    :type dataset: string
+    :param dataset: the path to the dataset (here MNIST)
+    '''
+
+    #############
+    # LOAD DATA #
+    #############
+    print '... loading data'
+
+    # Load the dataset
+    f = gzip.open(dataset,'rb')
+    train_set, valid_set, test_set = cPickle.load(f)
+    f.close()
+
+    test_set = (test_set[0][:200], test_set[1][:200])
+    print train_set[1][:10]
+
+    def shared_dataset(data_xy):
+        """ Function that loads the dataset into shared variables
+
+        The reason we store our dataset in shared variables is to allow
+        Theano to copy it into the GPU memory (when code is run on GPU).
+        Since copying data into the GPU is slow, copying a minibatch everytime
+        is needed (the default behaviour if the data is not in a shared
+        variable) would lead to a large decrease in performance.
+        """
+        data_x, data_y = data_xy
+        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+        # When storing data on the GPU it has to be stored as floats
+        # therefore we will store the labels as ``floatX`` as well
+        # (``shared_y`` does exactly that). But during our computations
+        # we need them as ints (we use labels as index, and if they are
+        # floats it doesn't make sense) therefore instead of returning
+        # ``shared_y`` we will have to cast it to int. This little hack
+        # lets ous get around this issue
+        return shared_x, T.cast(shared_y, 'int32')
+
+    test_set_x,  test_set_y  = shared_dataset(test_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    train_set_x, train_set_y = shared_dataset(train_set)
+
+    rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
+    return rval
+
+
+
+if __name__ == "__main__":
+    import PIL.Image
+    ((train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)) = \
+        load_data()
+
+    tN = 2000
+    print "len(train_set_x.value.T)", len(train_set_x.value)
+    for i in xrange(1): # xrange(len(train_set_x.value)//tN):
+        X = [train_set_x.value[j] for j in xrange(i*tN, i*tN+tN)]
+        X = numpy.asarray(X)
+        arr = tile_raster_images( X,
+                                  img_shape = (A_DIM, B_DIM),tile_shape = (tN//10,10),
+                                  tile_spacing=(1,1), scale_rows_to_unit_interval = False,
+                                  output_pixel_vals = True)
+        #matplotlib.pyplot.imsave(fname = 'hhgttg010100%d.png' % i, arr = arr)#, vmin = 0.0, vmax = 1.0)
+        PIL.Image.fromarray(arr).save('mnist%d.png' % i)
+    
+    
+
+
diff --git a/code/load_mp3.py b/code/load_mp3.py
new file mode 100755
index 00000000..1a8f8a99
--- /dev/null
+++ b/code/load_mp3.py
@@ -0,0 +1,146 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+from __future__ import division
+
+import mad, numpy, random, os, time, cPickle, gzip, sys
+from theano.tensor.shared_randomstreams import RandomStreams
+from utils import tile_raster_images
+import matplotlib.pyplot
+import theano
+import theano.tensor as T
+
+F_DIM = 1   # 2  frames
+X_DIM = 36  # 36 samples
+S_DIM = 8   # 8  subbands
+
+
+#        SAMPLES [MDCTed]
+#   S  ------------------->
+#   U  |  
+#   B  |   amplitudes 
+#   B  |   in colors
+#   A  |  blue ... red
+#   N  |
+#   D  V
+#   S
+#   
+   
+def load_data(dataset):
+    ''' Loads the dataset
+
+    :type dataset: string
+    :param dataset: the path to the dataset (ignored)
+    '''
+
+    #############
+    # LOAD DATA #
+    #############
+    print '... loading data'
+    
+    
+    frames = []
+    def fCallback(mf):
+        frame = []
+        channel = 0
+        for sample in xrange(X_DIM):
+            for subband in reversed(xrange(S_DIM)):
+                frame.append((mf.subband_value(channel, sample, subband) + 1.0) / 2.0)
+                # fake it
+                #frame.append(subband / S_DIM * len(frames) / 2294)
+        frames.append(frame)
+                   
+     # frequency domain filter
+    for f in dataset:
+        mf = mad.MadFile(f)
+        mf.set_filter_callback(fCallback)
+        while mf.read():
+            pass
+    
+    # corpus size
+    (TRAIN, VALID, TEST) = (100, 1, 1)
+    SIZE = len(frames) // (TRAIN+VALID+TEST)
+    (TRAIN, VALID, TEST) = (TRAIN * SIZE, VALID * SIZE, TEST * SIZE)
+    print 'frames =', len(frames), 'TRAIN = ', TRAIN
+    
+
+    
+    corpus = []    
+    for i in xrange(TRAIN + VALID + TEST):        
+        x = frames[i * F_DIM : i * F_DIM + F_DIM]
+        # concatenate frames, set label
+        corpus.append((sum(x, []), i))            
+    
+
+    # make train/valid/test datasets
+    train_set = ([corpus[i][0] for i in xrange(0, TRAIN)], 
+                 [corpus[i][1] for i in xrange(0, TRAIN)] )
+    valid_set = ([corpus[i][0] for i in xrange(TRAIN, TRAIN + VALID)], 
+                 [corpus[i][1] for i in xrange(TRAIN, TRAIN + VALID)] )
+    test_set = ([corpus[i][0] for i in xrange(TRAIN + VALID, TRAIN + VALID + TEST)],
+                [corpus[i][1] for i in xrange(TRAIN + VALID, TRAIN + VALID + TEST)] )
+    
+    print 'input = ', len(train_set[0][0])
+    #for i in xrange(F_DIM  * X_DIM * S_DIM):
+    #    print train_set[0][0][i],
+    #    if not (i+1)%S_DIM: print
+    #print 'label =', train_set[1][0]
+
+    def shared_dataset(data_xy):
+        """ Function that loads the dataset into shared variables
+        
+        The reason we store our dataset in shared variables is to allow 
+        Theano to copy it into the GPU memory (when code is run on GPU). 
+        Since copying data into the GPU is slow, copying a minibatch everytime
+        is needed (the default behaviour if the data is not in a shared 
+        variable) would lead to a large decrease in performance.
+        """
+        data_x, data_y = data_xy
+        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+        # When storing data on the GPU it has to be stored as floats
+        # therefore we will store the labels as ``floatX`` as well
+        # (``shared_y`` does exactly that). But during our computations
+        # we need them as ints (we use labels as index, and if they are 
+        # floats it doesn't make sense) therefore instead of returning 
+        # ``shared_y`` we will have to cast it to int. This little hack
+        # lets ous get around this issue
+        return shared_x, T.cast(shared_y, 'int32')
+
+    test_set_x,  test_set_y  = shared_dataset(test_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    train_set_x, train_set_y = shared_dataset(train_set)
+    print train_set_x.value
+    rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
+    return rval
+
+
+
+if __name__ == "__main__":    
+    import PIL.Image
+    d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy"
+    dataset = sorted([os.path.join(d, f) for f in os.listdir(d)])
+    dataset = ["/home/dmitry/mp3/440-1k-2k.mp3"]
+
+    # lame --preset cbr 48kbit -m mono
+    ((train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)) = \
+        load_data(dataset[:1])
+        
+    #from pca import pca
+    #V,S,immean = pca(train_set_x.value[0:200])
+    #train_set_x.value = V
+    #print V
+    
+    #matplotlib.pyplot.imshow(V[0].reshape(S_DIM, F_DIM * X_DIM).T)
+    #matplotlib.pyplot.imshow(train_set_x.value[0].reshape(S_DIM, F_DIM * X_DIM), vmin = 0.0, vmax = 1.0)
+    #matplotlib.pyplot.show()
+       
+    tN = 499
+    print "len(train_set_x.value.T)", len(train_set_x.value)
+    for i in xrange(len(train_set_x.value)//tN):
+        arr = tile_raster_images( X = train_set_x.value[i*tN:i*tN+tN],
+                                  img_shape = (S_DIM, F_DIM * X_DIM),tile_shape = (tN//10,10),
+                                  tile_spacing=(1,1), scale_rows_to_unit_interval = False,
+                                  output_pixel_vals = True)
+        #matplotlib.pyplot.imsave(fname = 'hhgttg010100%d.png' % i, arr = arr)#, vmin = 0.0, vmax = 1.0)
+        PIL.Image.fromarray(arr).save('hhgttg010100%d.png' % i)
+
diff --git a/code/load_names.py b/code/load_names.py
new file mode 100755
index 00000000..fae35467
--- /dev/null
+++ b/code/load_names.py
@@ -0,0 +1,118 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+from __future__ import division
+
+import csv, string, numpy, random
+import theano
+import theano.tensor as T
+
+X_DIM = 28
+V_DIM = 28
+
+# map letters to letters 'a':['a'], 'b':['b'] ... 
+# LVECTORS = {}
+#for i in xrange(0,26): LVECTORS[chr(i + ord('a'))] = [chr(i + ord('a'))]
+#LVECTORS[' '] = [' ']
+#LVECTORS['-'] = ['-']
+#LVECTORS["'"] = ["'"]
+
+# map letters to binary vectors 'a':[1.0, 0.0, 0.0, ...], b:[0.0, 1.0, 0.0, ...] ...
+LVECTORS = {}
+for i in xrange(0,26): LVECTORS[chr(i + ord('a'))] = [(0.0, 1.0)[i == j] for j in xrange(V_DIM)]
+LVECTORS[' '] = [(0.0, 1.0)[V_DIM == j] for j in xrange(V_DIM)]
+LVECTORS['-'] = [(0.0, 1.0)[26 == j] for j in xrange(V_DIM)]
+LVECTORS["'"] = [(0.0, 1.0)[27 == j] for j in xrange(V_DIM)]
+
+
+def load_data(dataset):
+    ''' Loads the dataset
+
+    :type dataset: string
+    :param dataset: the path to the dataset (ignored)
+    '''
+
+    #############
+    # LOAD DATA #
+    #############
+    print '... loading data' 
+
+    # Using real names corpus, requires NLTK (sudo apt-get install python-nltk)
+    # >>> import nltk
+    # >>> nltk.download("names")
+    #
+    # import nltk
+    # from nltk.corpus import names
+    # names = ([(name, 1) for name in names.words('male.txt')] +
+    #         [(name, 0) for name in names.words('female.txt')])
+    
+
+    # Using fake male/female names, random letters, 5-15 in length
+    #  female ends on 'a', 'e', 'i'
+    #  male ends on 'k', 'o', 'r', 's', 't'
+    def fake():
+        gender = random.choice((0, 1))
+        name = ''.join(random.choice(string.letters) for i in xrange(random.randint(5,12)))        
+        name += random.choice((['a', 'e', 'i'], ['k', 'o', 'r', 's', 't'])[gender])
+        return name.capitalize(), gender
+    names = [fake() for i in xrange(10000)]
+    print "len(names)", len(names), names[:5]   
+    
+    # make train/valid/test datasets
+    SHIFTS = 10
+    TRAIN = 6000 * SHIFTS; VALID = 1200 * SHIFTS; TEST = 600 * SHIFTS; 
+    corpus = []
+    for i in xrange(TRAIN + VALID + TEST):
+        (word, label) = random.choice(names)
+        x = [LVECTORS[l] for l in list(word.lower())]        
+        space = X_DIM - len(word); assert(space > 0);
+        shift = random.randint(0, space//2)
+        # produce shifted vectorized word (letter vectors are concatenated)
+        v = sum([LVECTORS[' ']] * shift + x + [LVECTORS[' ']] * (space - shift), [])
+        corpus.append((v, label))
+
+    random.shuffle(corpus)
+
+    train_set = ([corpus[i][0] for i in xrange(0, TRAIN)], 
+                 [corpus[i][1] for i in xrange(0, TRAIN)] )
+    valid_set = ([corpus[i][0] for i in xrange(TRAIN, TRAIN + VALID)], 
+                 [corpus[i][1] for i in xrange(TRAIN, TRAIN + VALID)] )
+    test_set = ([corpus[i][0] for i in xrange(TRAIN + VALID, TRAIN + VALID + TEST)],
+                [corpus[i][1] for i in xrange(TRAIN + VALID, TRAIN + VALID + TEST)] )
+    
+    print 'input ='
+    for i in xrange(V_DIM * X_DIM):
+        print train_set[0][0][i],
+        if not (i+1)%V_DIM: print
+    print 'label =', train_set[1][0]
+
+    def shared_dataset(data_xy):
+        """ Function that loads the dataset into shared variables
+        
+        The reason we store our dataset in shared variables is to allow 
+        Theano to copy it into the GPU memory (when code is run on GPU). 
+        Since copying data into the GPU is slow, copying a minibatch everytime
+        is needed (the default behaviour if the data is not in a shared 
+        variable) would lead to a large decrease in performance.
+        """
+        data_x, data_y = data_xy
+        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX))
+        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX))
+        # When storing data on the GPU it has to be stored as floats
+        # therefore we will store the labels as ``floatX`` as well
+        # (``shared_y`` does exactly that). But during our computations
+        # we need them as ints (we use labels as index, and if they are 
+        # floats it doesn't make sense) therefore instead of returning 
+        # ``shared_y`` we will have to cast it to int. This little hack
+        # lets ous get around this issue
+        return shared_x, T.cast(shared_y, 'int32')
+
+    test_set_x,  test_set_y  = shared_dataset(test_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    train_set_x, train_set_y = shared_dataset(train_set)
+    print train_set_x.value
+    rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)]
+    return rval
+
+if __name__ == "__main__":
+    load_data('')
+
diff --git a/code/plots/50Hz-1kHz-2kHz.png b/code/plots/50Hz-1kHz-2kHz.png
new file mode 100644
index 00000000..cc9de6fb
Binary files /dev/null and b/code/plots/50Hz-1kHz-2kHz.png differ
diff --git a/code/plots/hhgttg0101007.png b/code/plots/hhgttg0101007.png
new file mode 100644
index 00000000..dde7f158
Binary files /dev/null and b/code/plots/hhgttg0101007.png differ
diff --git a/code/plots/mnist.pkl.gz-kNN.png b/code/plots/mnist.pkl.gz-kNN.png
new file mode 100644
index 00000000..8ac09ae7
Binary files /dev/null and b/code/plots/mnist.pkl.gz-kNN.png differ
diff --git a/code/plots/v06.png b/code/plots/v06.png
new file mode 100644
index 00000000..e6e10d77
Binary files /dev/null and b/code/plots/v06.png differ
diff --git a/code/rbm.py b/code/rbm.py
index 689b3f74..4e055f19 100644
--- a/code/rbm.py
+++ b/code/rbm.py
@@ -15,11 +15,11 @@
 from theano.tensor.shared_randomstreams import RandomStreams
 
 from utils import tile_raster_images
-from logistic_sgd import load_data
+from load_mp3 import *
 
 class RBM(object):
     """Restricted Boltzmann Machine (RBM)  """
-    def __init__(self, input=None, n_visible=784, n_hidden=500, \
+    def __init__(self, input=None, n_visible=S_DIM * F_DIM * X_DIM, n_hidden=1000, \
         W = None, hbias = None, vbias = None, numpy_rng = None, 
         theano_rng = None):
         """ 
@@ -302,7 +302,7 @@ def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
 def test_rbm(learning_rate=0.1, training_epochs = 15,
              dataset='../data/mnist.pkl.gz', batch_size = 20,
              n_chains = 20, n_samples = 10, output_folder = 'rbm_plots',
-             n_hidden = 500):
+             n_hidden = 1000):
     """
     Demonstrate how to train and afterwards sample from it using Theano.
 
@@ -341,7 +341,7 @@ def test_rbm(learning_rate=0.1, training_epochs = 15,
     persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),dtype=theano.config.floatX))
 
     # construct the RBM class
-    rbm = RBM( input = x, n_visible=28*28, \
+    rbm = RBM( input = x, n_visible=S_DIM * F_DIM * X_DIM, \
                n_hidden = n_hidden, numpy_rng = rng, theano_rng = theano_rng)
 
     # get the cost and the gradient corresponding to one step of CD-15
@@ -380,7 +380,7 @@ def test_rbm(learning_rate=0.1, training_epochs = 15,
         plotting_start = time.clock()
         # Construct image from the weight matrix 
         image = PIL.Image.fromarray(tile_raster_images( X = rbm.W.value.T,
-                 img_shape = (28,28),tile_shape = (10,10), 
+                 img_shape = (S_DIM, F_DIM * X_DIM),tile_shape = (10,10), 
                  tile_spacing=(1,1)))
         image.save('filters_at_epoch_%i.png'%epoch)
         plotting_stop = time.clock()
@@ -424,25 +424,24 @@ def test_rbm(learning_rate=0.1, training_epochs = 15,
     # samples for reinitializing the state of our persistent chain
     sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]],
                                                          updates = updates)
-
-
-    # create a space to store the image for plotting ( we need to leave 
-    # room for the tile_spacing as well)
-    image_data = numpy.zeros((29*n_samples+1,29*n_chains-1),dtype='uint8')
     for idx in xrange(n_samples):
         # generate `plot_every` intermediate samples that we discard, because successive samples in the chain are too correlated
         vis_mf, vis_sample = sample_fn()
-        print ' ... plotting sample ', idx
-        image_data[29*idx:29*idx+28,:] = tile_raster_images( 
-                X = vis_mf,
-                img_shape = (28,28),
-                tile_shape = (1, n_chains),
-                tile_spacing = (1,1))
+        print ' ... plotting sample ', idx       
+        image = PIL.Image.fromarray(
+                    tile_raster_images( 
+                            X = vis_mf,
+                            img_shape = (S_DIM, F_DIM * X_DIM),
+                            tile_shape = (1, n_chains),
+                            tile_spacing = (1,1)))
+        image.save('sample-%d.png' %idx)
         # construct image
     
-    image = PIL.Image.fromarray(image_data)
-    image.save('samples.png')
     os.chdir('../')
 
 if __name__ == '__main__':
-    test_rbm()
+    from dA import dA
+    d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy"
+    dataset = sorted([os.path.join(d, f) for f in os.listdir(d)])
+    #dataset = ["/home/dmitry/mp3/hhgttg0101006048k.mp3"]
+    test_rbm(dataset = dataset[:10], training_epochs = 15)
diff --git a/code/shift_mp3.py b/code/shift_mp3.py
new file mode 100644
index 00000000..90583b89
--- /dev/null
+++ b/code/shift_mp3.py
@@ -0,0 +1,182 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+from __future__ import division
+
+import mad, numpy, random, os, time, cPickle, gzip, sys
+from theano.tensor.shared_randomstreams import RandomStreams
+from utils import tile_raster_images
+import matplotlib.pyplot
+from collections import deque
+from operator import itemgetter
+import theano
+import theano.tensor as T
+
+X_DIM = 36  # 36 samples
+S_DIM = 8   # 8  subbands
+
+
+#        SAMPLES [MDCTed]
+#   S  ------------------->
+#   U  |  
+#   B  |   amplitudes 
+#   B  |   in colors
+#   A  |  blue ... red
+#   N  |
+#   D  V
+#   S
+#   
+   
+def load_mp3(filename):
+    ''' Loads the mp3 file to numpy array
+
+    :type filename: string
+    :param filename: the path to the mp3 file
+    '''
+
+    #############
+    # LOAD DATA #
+    #############
+    print '... loading data. filename = ', filename
+    mf = mad.MadFile(filename)    
+    samples = numpy.zeros((mf.frames() * X_DIM, S_DIM), dtype=theano.config.floatX) 
+    load_mp3.sample = 0;
+        
+    def fCallback(mf):
+        channel = 0
+        for frame_sample in xrange(X_DIM):
+            for subband in reversed(xrange(S_DIM)):
+                samples[load_mp3.sample,subband] = mf.subband_value(channel, frame_sample, subband)
+            load_mp3.sample += 1
+                
+     # frequency domain filter
+    mf.set_filter_callback(fCallback)
+    while mf.read():
+        pass
+    
+    print "frames = ", load_mp3.sample / X_DIM, "mf.frames() = ", mf.frames()   
+    return samples    
+
+
+def save_wave(samples, filename):
+    ''' Saves samples (numpy array) to the wave file. 
+        Uses the template mp3 file to set sampling rate. 
+
+    :type filename: string
+    :param filename: the path to the input mp3 file
+                     the path to the output .cdr file
+    '''
+    
+    save_wave.sample = 0;
+    def fCallback(mf):
+        for frame_sample in xrange(X_DIM):
+            for subband in reversed(xrange(S_DIM)):
+                mf.subband_value(0, frame_sample, subband, samples[save_wave.sample, subband])
+                mf.subband_value(1, frame_sample, subband, samples[save_wave.sample, subband])
+            for subband in reversed(xrange(S_DIM,32)):
+                mf.subband_value(0, frame_sample, subband, 0.0)
+                mf.subband_value(1, frame_sample, subband, 0.0)
+                                    
+            save_wave.sample += 1
+
+        if len(samples) == save_wave.sample: 
+            return False # to stop            
+                                                           
+     # frequency domain filter
+    mf = mad.MadFile(filename)
+    mf.set_filter_callback(fCallback)
+    
+    f = open(filename + ".cdr", 'wb')
+    while 1:
+        buffy = mf.read()
+        if buffy is None:
+            break
+        f.write(buffy)
+
+
+def shift_kNN( train_set_files,
+               test_set_file,
+               k = 9,
+               distance = T.sqr,
+               test_set_size = X_DIM*200000,
+               output_folder = 'plots' ):
+
+    #############################################
+    # Exhaustive kNN queries for the test set   #
+    #############################################
+    
+        # allocate symbolic variables for the data
+    index  = T.lscalar()    # index to a test set item
+    a = T.matrix('a')  # the data is presented as vectors
+    b = T.vector('b')  # the data is presented as vectors
+
+    # load test set
+    test_set_samples = load_mp3(test_set_file)[:test_set_size]
+    test_set_size = len(test_set_samples)
+    test_set_x = theano.shared(test_set_samples)    
+
+    
+    # go through train set    
+    kNN = [[] for test_index in xrange(test_set_size)]
+    
+    for filename in train_set_files:
+        train_set_samples = load_mp3(filename)
+        train_set_x = theano.shared(train_set_samples)
+        
+        # frames (X_DIM samples) 
+        frameQueue = deque(maxlen = X_DIM)
+        
+        # define theano.function
+        DistancesF = T.sum( distance(a - b), axis=1 )        
+        calc_distances = theano.function([index], DistancesF,
+             givens = {a:train_set_x[:], b:test_set_x[index]})
+
+        start_time = time.clock()
+        for test_index in xrange(test_set_size):
+            sampleDistances = calc_distances(test_index)
+            
+            # accumulate frame distances
+            for i, frameDistances in enumerate(frameQueue):
+                 frameDistances[:-i-1] += sampleDistances[i+1:]             
+            frameQueue.appendleft(sampleDistances)
+            
+            # find kNN frame            
+            if len(frameQueue) == X_DIM:
+                frameDistances = frameQueue.pop()
+                #fileKNN = [(filename, i, frameDistances[i]) for i in frameDistances.argsort()[:k] ]
+                #kNN[test_index].extend(fileKNN)
+                i = frameDistances[:-X_DIM].argmin()
+                kNN[test_index - X_DIM].append((filename, i, frameDistances[i]))
+                 
+
+            # print out time/ETA
+            if(test_index and not test_index % 100):
+                test_time = time.clock() - start_time + 0.00000001; 
+                ETA = ((test_time / test_index) * test_set_size - test_time); 
+                print "%.1f seconds; test %d (%d); ETA %.1f seconds" % (test_time, test_index, test_set_size, ETA)
+
+    # sort for nearest
+    for skNN in kNN: 
+        skNN.sort(key=itemgetter(2))
+        
+
+    # construct and save o file
+    X = numpy.zeros((test_set_size, S_DIM), dtype=theano.config.floatX)
+    for filename in train_set_files:
+        train_set_samples = load_mp3(filename)
+        for test_index in xrange(test_set_size - X_DIM):
+            (f, i, d) = kNN[test_index][0]
+            if f != filename: continue
+            X[test_index] = train_set_samples[i]   
+            
+    save_wave(X, filename = test_set_file)
+
+
+if __name__ == "__main__":    
+    import PIL.Image
+    d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy"
+    train_set_files = sorted([os.path.join(d, f) for f in os.listdir(d) if f.endswith(".mp3")])
+    test_set_file = "/home/dmitry/mp3/hhgttg01010060.mp3"
+    test_set_file = "/home/dmitry/mp3/advice_to_little_girls_twain_alnl.mp3"
+    test_set_file = "mp3/test.mp3"
+
+    shift_kNN(train_set_files[1:2], test_set_file)
diff --git a/code/speak_mp3.sh b/code/speak_mp3.sh
new file mode 100644
index 00000000..b0f92278
--- /dev/null
+++ b/code/speak_mp3.sh
@@ -0,0 +1,4 @@
+espeak -w mp3/test.wav "This is a test." 
+sox mp3/test.wav -c 2 -r 44100 mp3/test-441s.wav
+lame -ms -cbr mp3/test-441s.wav mp3/test.mp3
+
diff --git a/code/utils.py b/code/utils.py
index 781b3d48..f6a7270a 100644
--- a/code/utils.py
+++ b/code/utils.py
@@ -19,7 +19,8 @@ def scale_to_unit_interval(ndar,eps=1e-8):
 
 
 def tile_raster_images(X, img_shape, tile_shape,tile_spacing = (0,0), 
-              scale_rows_to_unit_interval = True, output_pixel_vals = True):
+              scale_rows_to_unit_interval = True, output_pixel_vals = True,
+              transpose = False):
     """
     Transform an array with one flattened image per row, into an array in 
     which images are reshaped and layed out like tiles on a floor.
@@ -55,6 +56,9 @@ def tile_raster_images(X, img_shape, tile_shape,tile_spacing = (0,0),
     assert len(tile_shape) == 2
     assert len(tile_spacing) == 2
 
+    if transpose:
+        img_shape = tuple(reversed(img_shape))
+
     # The expression below can be re-written in a more C style as 
     # follows : 
     #
@@ -116,7 +120,10 @@ def tile_raster_images(X, img_shape, tile_shape,tile_spacing = (0,0),
                         # function
                         this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape))
                     else:
-                        this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
+                        this_img = X[tile_row * tile_shape[1] + tile_col].reshape( img_shape )
+
+                    if transpose:
+                        this_img = this_img.T
                     # add the slice to the corresponding position in the 
                     # output array
                     c = 1