diff --git a/code/DBN.py b/code/DBN.py index 27781e93..70554537 100644 --- a/code/DBN.py +++ b/code/DBN.py @@ -8,7 +8,8 @@ import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams -from logistic_sgd import LogisticRegression, load_data +from logistic_sgd import LogisticRegression +from load_names import load_data from mlp import HiddenLayer from rbm import RBM diff --git a/code/SdA.py b/code/SdA.py old mode 100644 new mode 100755 index f6d04df6..679e1cd1 --- a/code/SdA.py +++ b/code/SdA.py @@ -1,3 +1,6 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + """ This tutorial introduces stacked denoising auto-encoders (SdA) using Theano. @@ -36,9 +39,11 @@ import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams -from logistic_sgd import LogisticRegression, load_data +from logistic_sgd import LogisticRegression +from load_mp3 import * from mlp import HiddenLayer from dA import dA +from dAau import dAau @@ -138,12 +143,13 @@ def __init__(self, numpy_rng, theano_rng = None, n_ins = 784, self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this - # layer - dA_layer = dA(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, - n_visible = input_size, - n_hidden = hidden_layers_sizes[i], - W = sigmoid_layer.W, bhid = sigmoid_layer.b) - self.dA_layers.append(dA_layer) + # layer, use dAau for the first layer + dAclass = (dA, dAau)[i == 0] + dA_layer = dAclass(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, + n_visible = input_size, + n_hidden = hidden_layers_sizes[i], + W = sigmoid_layer.W, bhid = sigmoid_layer.b) + self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP @@ -319,7 +325,7 @@ def test_SdA( finetune_lr = 0.1, pretraining_epochs = 15, \ numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class - sda = SdA( numpy_rng = numpy_rng, n_ins = 28*28, + sda = SdA( numpy_rng = numpy_rng, n_ins = S_DIM * F_DIM * X_DIM, hidden_layers_sizes = [1000,1000,1000], n_outs = 10) @@ -351,6 +357,8 @@ def test_SdA( finetune_lr = 0.1, pretraining_epochs = 15, \ print >> sys.stderr, ('The pretraining code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((end_time-start_time)/60.)) + + return ######################## # FINETUNING THE MODEL # ######################## @@ -434,6 +442,8 @@ def test_SdA( finetune_lr = 0.1, pretraining_epochs = 15, \ if __name__ == '__main__': - test_SdA() - + d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy" + dataset = sorted([os.path.join(d, f) for f in os.listdir(d)]) + dataset = ["/home/dmitry/mp3/hhgttg0101006048k.mp3"] + test_SdA(dataset = dataset[:1]) diff --git a/code/SdAext.py b/code/SdAext.py new file mode 100644 index 00000000..e152a389 --- /dev/null +++ b/code/SdAext.py @@ -0,0 +1,439 @@ +""" + This tutorial introduces stacked denoising auto-encoders (SdA) using Theano. + + Denoising autoencoders are the building blocks for SdA. + They are based on auto-encoders as the ones used in Bengio et al. 2007. + An autoencoder takes an input x and first maps it to a hidden representation + y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting + latent representation y is then mapped back to a "reconstructed" vector + z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight + matrix W' can optionally be constrained such that W' = W^T, in which case + the autoencoder is said to have tied weights. The network is trained such + that to minimize the reconstruction error (the error between x and z). + + For the denosing autoencoder, during training, first x is corrupted into + \tilde{x}, where \tilde{x} is a partially destroyed version of x by means + of a stochastic mapping. Afterwards y is computed as before (using + \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction + error is now measured between z and the uncorrupted input x, which is + computed as the cross-entropy : + - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)] + + + References : + - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and + Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103, + 2008 + - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise + Training of Deep Networks, Advances in Neural Information Processing + Systems 19, 2007 + +""" + +import numpy, time, cPickle, gzip, sys, os + +import theano +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +from logistic_sgd import LogisticRegression, load_data +from mlp import HiddenLayer +from dAext import dA + + + +class SdA(object): + """Stacked denoising auto-encoder class (SdA) + + A stacked denoising autoencoder model is obtained by stacking several + dAs. The hidden layer of the dA at layer `i` becomes the input of + the dA at layer `i+1`. The first layer dA gets as input the input of + the SdA, and the hidden layer of the last dA represents the output. + Note that after pretraining, the SdA is dealt with as a normal MLP, + the dAs are only used to initialize the weights. + """ + + def __init__(self, numpy_rng, theano_rng = None, n_ins = 784, + hidden_layers_sizes = [500,500], n_outs = 10, + corruption_levels = [0.1, 0.1]): + """ This class is made to support a variable number of layers. + + :type numpy_rng: numpy.random.RandomState + :param numpy_rng: numpy random number generator used to draw initial + weights + + :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams + :param theano_rng: Theano random generator; if None is given one is + generated based on a seed drawn from `rng` + + :type n_ins: int + :param n_ins: dimension of the input to the sdA + + :type n_layers_sizes: list of ints + :param n_layers_sizes: intermediate layers size, must contain + at least one value + + :type n_outs: int + :param n_outs: dimension of the output of the network + + :type corruption_levels: list of float + :param corruption_levels: amount of corruption to use for each + layer + """ + + self.sigmoid_layers = [] + self.dA_layers = [] + self.params = [] + self.n_layers = len(hidden_layers_sizes) + + assert self.n_layers > 0 + + if not theano_rng: + theano_rng = RandomStreams(numpy_rng.randint(2**30)) + # allocate symbolic variables for the data + self.x = T.matrix('x') # the data is presented as rasterized images + self.y = T.ivector('y') # the labels are presented as 1D vector of + # [int] labels + + # The SdA is an MLP, for which all weights of intermediate layers + # are shared with a different denoising autoencoders + # We will first construct the SdA as a deep multilayer perceptron, + # and when constructing each sigmoidal layer we also construct a + # denoising autoencoder that shares weights with that layer + # During pretraining we will train these autoencoders (which will + # lead to chainging the weights of the MLP as well) + # During finetunining we will finish training the SdA by doing + # stochastich gradient descent on the MLP + + for i in xrange( self.n_layers ): + # construct the sigmoidal layer + + # the size of the input is either the number of hidden units of + # the layer below or the input size if we are on the first layer + if i == 0 : + input_size = n_ins + else: + input_size = hidden_layers_sizes[i-1] + + # the input to this layer is either the activation of the hidden + # layer below or the input of the SdA if you are on the first + # layer + if i == 0 : + layer_input = self.x + else: + layer_input = self.sigmoid_layers[-1].output + + sigmoid_layer = HiddenLayer(rng = numpy_rng, + input = layer_input, + n_in = input_size, + n_out = hidden_layers_sizes[i], + activation = T.nnet.sigmoid) + # add the layer to our list of layers + self.sigmoid_layers.append(sigmoid_layer) + # its arguably a philosophical question... + # but we are going to only declare that the parameters of the + # sigmoid_layers are parameters of the StackedDAA + # the visible biases in the dA are parameters of those + # dA, but not the SdA + self.params.extend(sigmoid_layer.params) + + # Construct a denoising autoencoder that shared weights with this + # layer + dA_layer = dA(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, + n_visible = input_size, + n_hidden = hidden_layers_sizes[i], + W = sigmoid_layer.W, bhid = sigmoid_layer.b) + self.dA_layers.append(dA_layer) + + + # We now need to add a logistic layer on top of the MLP + self.logLayer = LogisticRegression(\ + input = self.sigmoid_layers[-1].output,\ + n_in = hidden_layers_sizes[-1], n_out = n_outs) + + self.params.extend(self.logLayer.params) + # construct a function that implements one step of finetunining + + # compute the cost for second phase of training, + # defined as the negative log likelihood + self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) + # compute the gradients with respect to the model parameters + # symbolic variable that points to the number of errors made on the + # minibatch given by self.x and self.y + self.errors = self.logLayer.errors(self.y) + + def pretraining_functions(self, train_set_x, batch_size): + ''' Generates a list of functions, each of them implementing one + step in trainnig the dA corresponding to the layer with same index. + The function will require as input the minibatch index, and to train + a dA you just need to iterate, calling the corresponding function on + all minibatch indexes. + + :type train_set_x: theano.tensor.TensorType + :param train_set_x: Shared variable that contains all datapoints used + for training the dA + + :type batch_size: int + :param batch_size: size of a [mini]batch + + :type learning_rate: float + :param learning_rate: learning rate used during training for any of + the dA layers + ''' + + # index to a [mini]batch + index = T.lscalar('index') # index to a minibatch + corruption_level = T.scalar('corruption') # amount of corruption to use + learning_rate = T.scalar('lr') # learning rate to use + # number of batches + n_batches = train_set_x.value.shape[0] / batch_size + # begining of a batch, given `index` + batch_begin = index * batch_size + # ending of a batch given `index` + batch_end = batch_begin+batch_size + + pretrain_fns = [] + for dA in self.dA_layers: + # get the cost and the updates list + cost,updates = dA.get_cost_updates( corruption_level, learning_rate) + # compile the theano function + fn = theano.function( inputs = [index, + theano.Param(corruption_level, default = 0.2), + theano.Param(learning_rate, default = 0.1)], + outputs = cost, + updates = updates, + givens = {self.x :train_set_x[batch_begin:batch_end]}) + # append `fn` to the list of functions + pretrain_fns.append(fn) + + return pretrain_fns + + + def build_finetune_functions(self, datasets, batch_size, learning_rate): + '''Generates a function `train` that implements one step of + finetuning, a function `validate` that computes the error on + a batch from the validation set, and a function `test` that + computes the error on a batch from the testing set + + :type datasets: list of pairs of theano.tensor.TensorType + :param datasets: It is a list that contain all the datasets; + the has to contain three pairs, `train`, + `valid`, `test` in this order, where each pair + is formed of two Theano variables, one for the + datapoints, the other for the labels + + :type batch_size: int + :param batch_size: size of a minibatch + + :type learning_rate: float + :param learning_rate: learning rate used during finetune stage + ''' + + (train_set_x, train_set_y) = datasets[0] + (valid_set_x, valid_set_y) = datasets[1] + (test_set_x , test_set_y ) = datasets[2] + + # compute number of minibatches for training, validation and testing + n_valid_batches = valid_set_x.value.shape[0] / batch_size + n_test_batches = test_set_x.value.shape[0] / batch_size + + index = T.lscalar('index') # index to a [mini]batch + + # compute the gradients with respect to the model parameters + gparams = T.grad(self.finetune_cost, self.params) + + # compute list of fine-tuning updates + updates = {} + for param, gparam in zip(self.params, gparams): + updates[param] = param - gparam*learning_rate + + train_fn = theano.function(inputs = [index], + outputs = self.finetune_cost, + updates = updates, + givens = { + self.x : train_set_x[index*batch_size:(index+1)*batch_size], + self.y : train_set_y[index*batch_size:(index+1)*batch_size]}) + + test_score_i = theano.function([index], self.errors, + givens = { + self.x: test_set_x[index*batch_size:(index+1)*batch_size], + self.y: test_set_y[index*batch_size:(index+1)*batch_size]}) + + valid_score_i = theano.function([index], self.errors, + givens = { + self.x: valid_set_x[index*batch_size:(index+1)*batch_size], + self.y: valid_set_y[index*batch_size:(index+1)*batch_size]}) + + # Create a function that scans the entire validation set + def valid_score(): + return [valid_score_i(i) for i in xrange(n_valid_batches)] + + # Create a function that scans the entire test set + def test_score(): + return [test_score_i(i) for i in xrange(n_test_batches)] + + return train_fn, valid_score, test_score + + + + + + +def test_SdA( finetune_lr = 0.4, pretraining_epochs = 25, \ + pretrain_lr = 0.4, training_epochs = 25, \ + dataset='../data/mnist.pkl.gz', batch_size = 200): + """ + Demonstrates how to train and test a stochastic denoising autoencoder. + + This is demonstrated on MNIST. + + :type learning_rate: float + :param learning_rate: learning rate used in the finetune stage + (factor for the stochastic gradient) + + :type pretraining_epochs: int + :param pretraining_epochs: number of epoch to do pretraining + + :type pretrain_lr: float + :param pretrain_lr: learning rate to be used during pre-training + + :type n_iter: int + :param n_iter: maximal number of iterations ot run the optimizer + + :type dataset: string + :param dataset: path the the pickled dataset + + """ + + datasets = load_data(dataset) + + train_set_x, train_set_y = datasets[0] + valid_set_x, valid_set_y = datasets[1] + test_set_x , test_set_y = datasets[2] + + + # compute number of minibatches for training, validation and testing + n_train_batches = train_set_x.value.shape[0] / batch_size + + # numpy random generator + numpy_rng = numpy.random.RandomState(89677) + print '... building the model' + # construct the stacked denoising autoencoder class + sda = SdA( numpy_rng = numpy_rng, n_ins = 28*28, + hidden_layers_sizes = [1000,1000,1000], + n_outs = 10) + + + ######################### + # PRETRAINING THE MODEL # + ######################### + print '... getting the pretraining functions' + pretraining_fns = sda.pretraining_functions( + train_set_x = train_set_x, + batch_size = batch_size ) + + print '... pre-training the model' + start_time = time.clock() + ## Pre-train layer-wise + corruption_levels = [.10,.10,.10] + for i in xrange(sda.n_layers): + # go through pretraining epochs + for epoch in xrange(pretraining_epochs): + # go through the training set + c = [] + for batch_index in xrange(n_train_batches): + c.append( pretraining_fns[i](index = batch_index, + corruption = corruption_levels[i], + lr = pretrain_lr ) ) + print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),numpy.mean(c) + + end_time = time.clock() + + print >> sys.stderr, ('The pretraining code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((end_time-start_time)/60.)) + + ######################## + # FINETUNING THE MODEL # + ######################## + + # get the training, validation and testing function for the model + print '... getting the finetuning functions' + train_fn, validate_model, test_model = sda.build_finetune_functions ( + datasets = datasets, batch_size = batch_size, + learning_rate = finetune_lr) + + print '... finetunning the model' + # early-stopping parameters + patience = 10*n_train_batches # look as this many examples regardless + patience_increase = 2. # wait this much longer when a new best is + # found + improvement_threshold = 0.995 # a relative improvement of this much is + # considered significant + validation_frequency = min(n_train_batches, patience/2) + # go through this many + # minibatche before checking the network + # on the validation set; in this case we + # check every epoch + + + best_params = None + best_validation_loss = float('inf') + test_score = 0. + start_time = time.clock() + + done_looping = False + epoch = 0 + + while (epoch < training_epochs) and (not done_looping): + for minibatch_index in xrange(n_train_batches): + minibatch_avg_cost = train_fn(minibatch_index) + iter = epoch * n_train_batches + minibatch_index + + if (iter+1) % validation_frequency == 0: + validation_losses = validate_model() + this_validation_loss = numpy.mean(validation_losses) + print('epoch %i, minibatch %i/%i, validation error %f %%' % \ + (epoch, minibatch_index+1, n_train_batches, \ + this_validation_loss*100.)) + + + # if we got the best validation score until now + if this_validation_loss < best_validation_loss: + + #improve patience if loss improvement is good enough + if this_validation_loss < best_validation_loss * \ + improvement_threshold : + patience = max(patience, iter * patience_increase) + + # save best validation score and iteration number + best_validation_loss = this_validation_loss + best_iter = iter + + # test it on the test set + test_losses = test_model() + test_score = numpy.mean(test_losses) + print((' epoch %i, minibatch %i/%i, test error of best ' + 'model %f %%') % + (epoch, minibatch_index+1, n_train_batches, + test_score*100.)) + + + if patience <= iter : + done_looping = True + break + epoch = epoch + 1 + + end_time = time.clock() + print(('Optimization complete with best validation score of %f %%,' + 'with test performance %f %%') % + (best_validation_loss * 100., test_score*100.)) + print >> sys.stderr, ('The training code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((end_time-start_time)/60.)) + + + + + + +if __name__ == '__main__': + test_SdA() + + diff --git a/code/dA_plots/filters_corruption_0.png b/code/dA_plots/filters_corruption_0.png new file mode 100644 index 00000000..eaffcb63 Binary files /dev/null and b/code/dA_plots/filters_corruption_0.png differ diff --git a/code/dA_plots/filters_corruption_0_145860_25.png b/code/dA_plots/filters_corruption_0_145860_25.png new file mode 100644 index 00000000..eaede07e Binary files /dev/null and b/code/dA_plots/filters_corruption_0_145860_25.png differ diff --git a/code/dA_plots/filters_corruption_261900x100.png b/code/dA_plots/filters_corruption_261900x100.png new file mode 100644 index 00000000..05f5028b Binary files /dev/null and b/code/dA_plots/filters_corruption_261900x100.png differ diff --git a/code/dA_plots/filters_corruption_261900x500.png b/code/dA_plots/filters_corruption_261900x500.png new file mode 100644 index 00000000..b8bd6f5a Binary files /dev/null and b/code/dA_plots/filters_corruption_261900x500.png differ diff --git a/code/dA_plots/filters_corruption_30.png b/code/dA_plots/filters_corruption_30.png new file mode 100644 index 00000000..4efeab23 Binary files /dev/null and b/code/dA_plots/filters_corruption_30.png differ diff --git a/code/dA_plots/filters_corruption_30_145860_25.png b/code/dA_plots/filters_corruption_30_145860_25.png new file mode 100644 index 00000000..56484ca5 Binary files /dev/null and b/code/dA_plots/filters_corruption_30_145860_25.png differ diff --git a/code/dA_plots/log_dA_145860_100.txt b/code/dA_plots/log_dA_145860_100.txt new file mode 100644 index 00000000..efd98d1d --- /dev/null +++ b/code/dA_plots/log_dA_145860_100.txt @@ -0,0 +1,217 @@ +Using gpu device 0: GeForce GTX 470 +... loading data +frames = 150741 TRAIN = 145860 +input = 288 +[[ 0.4999986 0.50001061 0.50002503 ..., 0.50001675 0.4999994 + 0.50003493] + [ 0.50000131 0.49999517 0.5000155 ..., 0.50000322 0.5000034 0.500085 ] + [ 0.50001192 0.50001007 0.49999484 ..., 0.50000703 0.49997738 + 0.49995062] + ..., + [ 0.50036383 0.50014853 0.50039202 ..., 0.50093889 0.49868685 + 0.50046623] + [ 0.50023872 0.5000689 0.50005275 ..., 0.4986825 0.49972135 + 0.50064117] + [ 0.50001383 0.49999261 0.49988553 ..., 0.49990761 0.4990716 0.499295 ]] +Training epoch 0, cost 4.18604124889 +Training epoch 1, cost 2.63120056261 +Training epoch 2, cost 2.63757145122 +Training epoch 3, cost 2.64306500026 +Training epoch 4, cost 2.64316516051 +Training epoch 5, cost 2.63991771862 +Training epoch 6, cost 2.62174934441 +Training epoch 7, cost 2.59528159494 +Training epoch 8, cost 2.55827880759 +Training epoch 9, cost 2.53036221591 +Training epoch 10, cost 2.50064140057 +Training epoch 11, cost 2.47210054804 +Training epoch 12, cost 2.4462136745 +Training epoch 13, cost 2.42688381625 +Training epoch 14, cost 2.40377438126 +Training epoch 15, cost 2.38560681047 +Training epoch 16, cost 2.36972873175 +Training epoch 17, cost 2.35224353618 +Training epoch 18, cost 2.3381953907 +Training epoch 19, cost 2.32559930113 +Training epoch 20, cost 2.31283797391 +Training epoch 21, cost 2.30497753625 +Training epoch 22, cost 2.2933128835 +Training epoch 23, cost 2.28335845374 +Training epoch 24, cost 2.27583143725 +Training epoch 25, cost 2.26858936866 +Training epoch 26, cost 2.2620628835 +Training epoch 27, cost 2.25544882507 +Training epoch 28, cost 2.24985216989 +Training epoch 29, cost 2.24279462113 +Training epoch 30, cost 2.23837725923 +Training epoch 31, cost 2.23438450719 +Training epoch 32, cost 2.22929628582 +Training epoch 33, cost 2.22418556857 +Training epoch 34, cost 2.22108354651 +Training epoch 35, cost 2.21707713625 +Training epoch 36, cost 2.21313044722 +Training epoch 37, cost 2.20998544195 +Training epoch 38, cost 2.20594635909 +Training epoch 39, cost 2.20226011351 +Training epoch 40, cost 2.19935948319 +Training epoch 41, cost 2.19585360641 +Training epoch 42, cost 2.19397091496 +Training epoch 43, cost 2.19066093986 +Training epoch 44, cost 2.18775321263 +Training epoch 45, cost 2.18528214662 +Training epoch 46, cost 2.18255545772 +Training epoch 47, cost 2.17975271129 +Training epoch 48, cost 2.17756940517 +Training epoch 49, cost 2.17515029395 +Training epoch 50, cost 2.17346497712 +Training epoch 51, cost 2.17163852538 +Training epoch 52, cost 2.16883912656 +Training epoch 53, cost 2.16715702343 +Training epoch 54, cost 2.16592885515 +Training epoch 55, cost 2.16338802512 +Training epoch 56, cost 2.16145538744 +Training epoch 57, cost 2.15952435662 +Training epoch 58, cost 2.15784774356 +Training epoch 59, cost 2.15603829764 +Training epoch 60, cost 2.15435646232 +Training epoch 61, cost 2.15263258111 +Training epoch 62, cost 2.15083598999 +Training epoch 63, cost 2.15009710724 +Training epoch 64, cost 2.14758011214 +Training epoch 65, cost 2.14686091329 +Training epoch 66, cost 2.14493577424 +Training epoch 67, cost 2.1434399317 +Training epoch 68, cost 2.14217293124 +Training epoch 69, cost 2.14082746298 +Training epoch 70, cost 2.14039013223 +Training epoch 71, cost 2.13873454211 +Training epoch 72, cost 2.13668072124 +Training epoch 73, cost 2.13539363516 +Training epoch 74, cost 2.13416707374 +Training epoch 75, cost 2.1334285927 +Training epoch 76, cost 2.13237208946 +Training epoch 77, cost 2.13115155372 +Training epoch 78, cost 2.13028988092 +Training epoch 79, cost 2.12927421843 +Training epoch 80, cost 2.12782417108 +Training epoch 81, cost 2.12766683378 +Training epoch 82, cost 2.12608502481 +Training epoch 83, cost 2.12487319283 +Training epoch 84, cost 2.12353334854 +Training epoch 85, cost 2.12343814274 +Training epoch 86, cost 2.12165467422 +Training epoch 87, cost 2.12083879662 +Training epoch 88, cost 2.12023890631 +Training epoch 89, cost 2.11913366199 +Training epoch 90, cost 2.11807983683 +TrainiThe no corruption code for file dA.py ran for 24.39m +ng epoch 91, cost 2.11759590746 +Training epoch 92, cost 2.1165943049 +Training epoch 93, cost 2.11573370334 +Training epoch 94, cost 2.11396201837 +Training epoch 95, cost 2.11389439681 +Training epoch 96, cost 2.11330334417 +Training epoch 97, cost 2.11206620432 +Training epoch 98, cost 2.11148198079 +Training epoch 99, cost 2.11097475212 +Training epoch 0, cost 4.07315742621 +Training epoch 1, cost 2.63372867176 +Training epoch 2, cost 2.63366493341 +Training epoch 3, cost 2.63492845237 +Training epoch 4, cost 2.63488212155 +Training epoch 5, cost 2.63581650427 +Training epoch 6, cost 2.63493782565 +Training epoch 7, cost 2.63661135892 +Training epoch 8, cost 2.63508913727 +Training epoch 9, cost 2.64286842906 +Training epoch 10, cost 2.6315966509 +Training epoch 11, cost 2.63017191142 +Training epoch 12, cost 2.62661327643 +Training epoch 13, cost 2.61729274861 +Training epoch 14, cost 2.60820387486 +Training epoch 15, cost 2.58270934029 +Training epoch 16, cost 2.56930366662 +Training epoch 17, cost 2.56869226056 +Training epoch 18, cost 2.54020309501 +Training epoch 19, cost 2.5361950779 +Training epoch 20, cost 2.51913034117 +Training epoch 21, cost 2.51773961333 +Training epoch 22, cost 2.50552434698 +Training epoch 23, cost 2.4884221171 +Training epoch 24, cost 2.48178690182 +Training epoch 25, cost 2.47160644196 +Training epoch 26, cost 2.46567690122 +Training epoch 27, cost 2.45171150847 +Training epoch 28, cost 2.4494054123 +Training epoch 29, cost 2.43754392054 +Training epoch 30, cost 2.43117892371 +Training epoch 31, cost 2.42459244952 +Training epoch 32, cost 2.42241275881 +Training epoch 33, cost 2.41123283885 +Training epoch 34, cost 2.4087931064 +Training epoch 35, cost 2.40255183695 +Training epoch 36, cost 2.39680654823 +Training epoch 37, cost 2.39457945547 +Training epoch 38, cost 2.38670643082 +Training epoch 39, cost 2.38559850842 +Training epoch 40, cost 2.37964459713 +Training epoch 41, cost 2.37848150624 +Training epoch 42, cost 2.37343921397 +Training epoch 43, cost 2.36909215172 +Training epoch 44, cost 2.36754368487 +Training epoch 45, cost 2.36483909014 +Training epoch 46, cost 2.35920761989 +Training epoch 47, cost 2.35824672417 +Training epoch 48, cost 2.35610613345 +Training epoch 49, cost 2.35196448007 +Training epoch 50, cost 2.3485084156 +Training epoch 51, cost 2.34958795035 +Training epoch 52, cost 2.34640746049 +Training epoch 53, cost 2.34146131136 +Training epoch 54, cost 2.34219296329 +Training epoch 55, cost 2.33889945838 +Training epoch 56, cost 2.33597767123 +Training epoch 57, cost 2.33485046663 +Training epoch 58, cost 2.33332931621 +Training epoch 59, cost 2.33153473365 +Training epoch 60, cost 2.32852537322 +Training epoch 61, cost 2.32647182015 +Training epoch 62, cost 2.3264766407 +Training epoch 63, cost 2.32504975876 +Training epoch 64, cost 2.32362501928 +Training epoch 65, cost 2.31910093583 +Training epoch 66, cost 2.3193325899 +Training epoch 67, cost 2.31832188186 +Training epoch 68, cost 2.31731010258 +Training epoch 69, cost 2.31425896408 +Training epoch 70, cost 2.31354820119 +Training epoch 71, cost 2.3129555417 +Training epoch 72, cost 2.31029593874 +Training epoch 73, cost 2.31095340781 +Training epoch 74, cost 2.30849117947 +Training epoch 75, cost 2.3075484947 +Training epoch 76, cost 2.30678684826 +Training epoch 77, cost 2.30503109788 +Training epoch 78, cost 2.30244166067 +Training epoch 79, cost 2.30401851519 +Training epoch 80, cost 2.30116019856 +Training epoch 81, cost 2.30084793423 +Training epoch 82, cost 2.29852175245 +Training epoch 83, cost 2.29725287733 +Training epoch 84, cost 2.29584233169 +Training epoch 85, cost 2.29692454451 +Training epoch 86, cost 2.29505444005 +Training epoch 87, cost 2.29498963047 +Training epoch 88, cost 2.29420147102 +Training epoch 89, cost 2.29307078491 +Training epoch 90, cost 2.29191733512 +Training epoch 91, cost 2.29119800237 +Training epoch 92, cost 2.28983753685 +Training epoch 93, cost 2.29037208196 +Training epoch 94, cost 2.28776630845 +Training epoch 95, cost 2.28908740616 +Training epoch 96, cost 2.2The 30% corruption code for file dA.py ran for 23.59m +8792886801 +Training epoch 97, cost 2.2865686703 +Training epoch 98, cost 2.28574542798 +Training epoch 99, cost 2.28418035702 diff --git a/code/dAau.py b/code/dAau.py new file mode 100755 index 00000000..d9401551 --- /dev/null +++ b/code/dAau.py @@ -0,0 +1,355 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +""" + This tutorial introduces denoising auto-encoders (dA) using Theano. + + Denoising autoencoders are the building blocks for SdA. + They are based on auto-encoders as the ones used in Bengio et al. 2007. + An autoencoder takes an input x and first maps it to a hidden representation + y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting + latent representation y is then mapped back to a "reconstructed" vector + z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight + matrix W' can optionally be constrained such that W' = W^T, in which case + the autoencoder is said to have tied weights. The network is trained such + that to minimize the reconstruction error (the error between x and z). + + For the denosing autoencoder, during training, first x is corrupted into + \tilde{x}, where \tilde{x} is a partially destroyed version of x by means + of a stochastic mapping. Afterwards y is computed as before (using + \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction + error is now measured between z and the uncorrupted input x, which is + computed as the cross-entropy : + - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)] + + + References : + - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and + Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103, + 2008 + - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise + Training of Deep Networks, Advances in Neural Information Processing + Systems 19, 2007 + +""" + +import numpy, time, cPickle, gzip, sys, os + +import theano +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +from load_mp3 import * +from utils import tile_raster_images + +import PIL.Image + + +class dAau(object): + """Denoising Auto-Encoder class (dAau) + + A denoising autoencoders tries to reconstruct the input from a corrupted + version of it by projecting it first in a latent space and reprojecting + it afterwards back in the input space. Please refer to Vincent et al.,2008 + for more details. If x is the input then equation (1) computes a partially + destroyed version of x by means of a stochastic mapping q_D. Equation (2) + computes the projection of the input into the latent space. Equation (3) + computes the reconstruction of the input, while equation (4) computes the + reconstruction error. + + .. math:: + + \tilde{x} ~ q_D(\tilde{x}|x) (1) + + y = s(W \tilde{x} + b) (2) + + x = s(W' y + b') (3) + + L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4) + + """ + + def __init__(self, numpy_rng, theano_rng = None, input = None, n_visible= 784, n_hidden= 500, + W = None, bhid = None, bvis = None): + """ + Initialize the dA class by specifying the number of visible units (the + dimension d of the input ), the number of hidden units ( the dimension + d' of the latent or hidden space ) and the corruption level. The + constructor also receives symbolic variables for the input, weights and + bias. Such a symbolic variables are useful when, for example the input is + the result of some computations, or when weights are shared between the + dA and an MLP layer. When dealing with SdAs this always happens, + the dA on layer 2 gets as input the output of the dA on layer 1, + and the weights of the dA are used in the second stage of training + to construct an MLP. + + :type numpy_rng: numpy.random.RandomState + :param numpy_rng: number random generator used to generate weights + + :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams + :param theano_rng: Theano random generator; if None is given one is generated + based on a seed drawn from `rng` + + :type input: theano.tensor.TensorType + :param input: a symbolic description of the input or None for standalone + dA + + :type n_visible: int + :param n_visible: number of visible units + + :type n_hidden: int + :param n_hidden: number of hidden units + + :type W: theano.tensor.TensorType + :param W: Theano variable pointing to a set of weights that should be + shared belong the dA and another architecture; if dA should + be standalone set this to None + + :type bhid: theano.tensor.TensorType + :param bhid: Theano variable pointing to a set of biases values (for + hidden units) that should be shared belong dA and another + architecture; if dA should be standalone set this to None + + :type bvis: theano.tensor.TensorType + :param bvis: Theano variable pointing to a set of biases values (for + visible units) that should be shared belong dA and another + architecture; if dA should be standalone set this to None + + + """ + self.n_visible = n_visible + self.n_hidden = n_hidden + + # create a Theano random generator that gives symbolic random values + if not theano_rng : + theano_rng = RandomStreams(rng.randint(2**30)) + + # note : W' was written as `W_prime` and b' as `b_prime` + if not W: + # W is initialized with `initial_W` which is uniformely sampled + # from -4*sqrt(6./(n_visible+n_hidden)) and + # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if + # converted using asarray to dtype + # theano.config.floatX so that the code is runable on GPU + initial_W = numpy.asarray( numpy_rng.uniform( + low = -4*numpy.sqrt(6./(n_hidden+n_visible)), + high = 4*numpy.sqrt(6./(n_hidden+n_visible)), + size = (n_visible, n_hidden)), dtype = theano.config.floatX) + W = theano.shared(value = initial_W, name ='W') + + if not bvis: + bvis = theano.shared(value = numpy.zeros(n_visible, + dtype = theano.config.floatX)) + + if not bhid: + bhid = theano.shared(value = numpy.zeros(n_hidden, + dtype = theano.config.floatX), name ='b') + + + self.W = W + # b corresponds to the bias of the hidden + self.b = bhid + # b_prime corresponds to the bias of the visible + self.b_prime = bvis + # tied weights, therefore W_prime is W transpose + self.W_prime = self.W.T + self.theano_rng = theano_rng + # if no input is given, generate a variable representing the input + if input == None : + # we use a matrix because we expect a minibatch of several examples, + # each example being a row + self.x = T.dmatrix(name = 'input') + else: + self.x = input + + self.params = [self.W, self.b, self.b_prime] + + def get_corrupted_input(self, input, corruption_level): + """ This function keeps ``1-corruption_level`` entries of the inputs the same + and zero-out randomly selected subset of size ``coruption_level`` + Note : first argument of theano.rng.binomial is the shape(size) of + random numbers that it should produce + second argument is the number of trials + third argument is the probability of success of any trial + + this will produce an array of 0s and 1s where 1 has a probability of + 1 - ``corruption_level`` and 0 with ``corruption_level`` + + The binomial function return int64 data type by default. + int64 multiplicated by the input type(floatX) always return float64. + To keep all data in floatX when floatX is float32, we set the dtype + of the binomial to floatX. As in our case the value of the binomial + is always 0 or 1, this don't change the result. This is needed to allow + the gpu to work correctly as it only support float32 for now. + """ + corruption = self.theano_rng.binomial( size = input.shape, n = 1, p = 1 - corruption_level, dtype=theano.config.floatX) + return input * corruption + (1 - corruption) * 0.5 + + + def get_hidden_values(self, input): + """ Computes the values of the hidden layer """ + return T.nnet.sigmoid(T.dot(input, self.W) + self.b) + + def get_reconstructed_input(self, hidden ): + """ Computes the reconstructed input given the values of the hidden layer """ + return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime) + + def get_cost_updates(self, corruption_level, learning_rate): + """ This function computes the cost and the updates for one trainng + step of the dA """ + + tilde_x = self.get_corrupted_input(self.x, corruption_level) + y = self.get_hidden_values( tilde_x) + z = self.get_reconstructed_input(y) + # note : we sum over the size of a datapoint; if we are using minibatches, + # L will be a vector, with one entry per example in minibatch + + # L = - T.sum( self.x*T.log(z) + (1-self.x)*T.log(1-z), axis=1 ) + L = T.sum( T.abs_(z - self.x), axis=1 ) + + # note : L is now a vector, where each element is the cross-entropy cost + # of the reconstruction of the corresponding example of the + # minibatch. We need to compute the average of all these to get + # the cost of the minibatch + cost = T.mean(L) + + # compute the gradients of the cost of the `dA` with respect + # to its parameters + gparams = T.grad(cost, self.params) + # generate the list of updates + updates = {} + for param, gparam in zip(self.params, gparams): + updates[param] = param - learning_rate*gparam + + return (cost, updates) + + + + + +from load_mp3 import * + +def test_dA( learning_rate = 0.1, training_epochs = 15, dataset ='/home/dmitry/mp3/hhgttg01010060.mp3', + batch_size = 20, output_folder = 'dA_plots' ): + + """ + This demo is tested on /home/dmitry/mp3/hhgttg01010060.mp2 + + :type learning_rate: float + :param learning_rate: learning rate used for training the DeNosing AutoEncoder + + :type training_epochs: int + :param training_epochs: number of epochs used for training + + :type dataset: string + :param dataset: path to the picked dataset + + """ + datasets = load_data(dataset) + train_set_x, train_set_y = datasets[0] + + # compute number of minibatches for training, validation and testing + n_train_batches = train_set_x.value.shape[0] / batch_size + + # allocate symbolic variables for the data + index = T.lscalar() # index to a [mini]batch + x = T.matrix('x') # the data is presented as rasterized images + + + if not os.path.isdir(output_folder): + os.makedirs(output_folder) + os.chdir(output_folder) + #################################### + # BUILDING THE MODEL NO CORRUPTION # + #################################### + + rng = numpy.random.RandomState(123) + theano_rng = RandomStreams( rng.randint(2**30)) + + da = dA(numpy_rng = rng, theano_rng = theano_rng, input = x, + n_visible = S_DIM * F_DIM * X_DIM, n_hidden = 1000) + + cost, updates = da.get_cost_updates(corruption_level = 0., + learning_rate = learning_rate) + + + train_da = theano.function([index], cost, updates = updates, + givens = {x:train_set_x[index*batch_size:(index+1)*batch_size]}) + + start_time = time.clock() + + ############ + # TRAINING # + ############ + + # go through training epochs + for epoch in xrange(training_epochs): + # go through trainng set + c = [] + for batch_index in xrange(n_train_batches): + c.append(train_da(batch_index)) + + print 'Training epoch %d, cost '%epoch, numpy.mean(c) + + end_time = time.clock() + + training_time = (end_time - start_time) + + print >> sys.stderr, ('The no corruption code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((training_time)/60.)) + image = PIL.Image.fromarray(tile_raster_images( X = da.W.value.T, + img_shape = (S_DIM, F_DIM * X_DIM),tile_shape = (10,10), + tile_spacing=(1,1))) + image.save('filters_corruption_0.png') + + ##################################### + # BUILDING THE MODEL CORRUPTION 30% # + ##################################### + + rng = numpy.random.RandomState(123) + theano_rng = RandomStreams( rng.randint(2**30)) + + da = dAau(numpy_rng = rng, theano_rng = theano_rng, input = x, + n_visible = S_DIM * F_DIM * X_DIM, n_hidden = 1000) + + cost, updates = da.get_cost_updates(corruption_level = 0.3, + learning_rate = learning_rate) + + + train_da = theano.function([index], cost, updates = updates, + givens = {x:train_set_x[index*batch_size:(index+1)*batch_size]}) + + start_time = time.clock() + + ############ + # TRAINING # + ############ + + # go through training epochs + for epoch in xrange(training_epochs): + # go through trainng set + c = [] + for batch_index in xrange(n_train_batches): + c.append(train_da(batch_index)) + + print 'Training epoch %d, cost '%epoch, numpy.mean(c) + + end_time = time.clock() + + training_time = (end_time - start_time) + + print >> sys.stderr, ('The 30% corruption code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % (training_time/60.)) + + image = PIL.Image.fromarray(tile_raster_images( X = da.W.value.T, + img_shape = (S_DIM, F_DIM * X_DIM),tile_shape = (10,10), + tile_spacing=(1,1))) + image.save('filters_corruption_30.png') + + os.chdir('../') + + +if __name__ == "__main__": + d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy" + dataset = sorted([os.path.join(d, f) for f in os.listdir(d)]) + #dataset = ["/home/dmitry/mp3/hhgttg01010060.mp3"] + test_dA(dataset = dataset[:5], training_epochs = 100) + diff --git a/code/dAext.py b/code/dAext.py new file mode 100644 index 00000000..26b3b228 --- /dev/null +++ b/code/dAext.py @@ -0,0 +1,375 @@ +""" + This tutorial introduces denoising auto-encoders (dA) using Theano. + + Denoising autoencoders are the building blocks for SdA. + They are based on auto-encoders as the ones used in Bengio et al. 2007. + An autoencoder takes an input x and first maps it to a hidden representation + y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting + latent representation y is then mapped back to a "reconstructed" vector + z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight + matrix W' can optionally be constrained such that W' = W^T, in which case + the autoencoder is said to have tied weights. The network is trained such + that to minimize the reconstruction error (the error between x and z). + + For the denosing autoencoder, during training, first x is corrupted into + \tilde{x}, where \tilde{x} is a partially destroyed version of x by means + of a stochastic mapping. Afterwards y is computed as before (using + \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction + error is now measured between z and the uncorrupted input x, which is + computed as the cross-entropy : + - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)] + + + References : + - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and + Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103, + 2008 + - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise + Training of Deep Networks, Advances in Neural Information Processing + Systems 19, 2007 + +""" + +import numpy, time, cPickle, gzip, sys, os + +import theano +import theano.tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +from logistic_sgd import load_data +from utils import tile_raster_images + +import PIL.Image + + +class dA(object): + """Denoising Auto-Encoder class (dA) + + A denoising autoencoders tries to reconstruct the input from a corrupted + version of it by projecting it first in a latent space and reprojecting + it afterwards back in the input space. Please refer to Vincent et al.,2008 + for more details. If x is the input then equation (1) computes a partially + destroyed version of x by means of a stochastic mapping q_D. Equation (2) + computes the projection of the input into the latent space. Equation (3) + computes the reconstruction of the input, while equation (4) computes the + reconstruction error. + + .. math:: + + \tilde{x} ~ q_D(\tilde{x}|x) (1) + + y = s(W \tilde{x} + b) (2) + + x = s(W' y + b') (3) + + L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4) + + """ + + def __init__(self, numpy_rng, theano_rng = None, input = None, n_visible= 784, n_hidden= 500, + W = None, bhid = None, bvis = None): + """ + Initialize the dA class by specifying the number of visible units (the + dimension d of the input ), the number of hidden units ( the dimension + d' of the latent or hidden space ) and the corruption level. The + constructor also receives symbolic variables for the input, weights and + bias. Such a symbolic variables are useful when, for example the input is + the result of some computations, or when weights are shared between the + dA and an MLP layer. When dealing with SdAs this always happens, + the dA on layer 2 gets as input the output of the dA on layer 1, + and the weights of the dA are used in the second stage of training + to construct an MLP. + + :type numpy_rng: numpy.random.RandomState + :param numpy_rng: number random generator used to generate weights + + :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams + :param theano_rng: Theano random generator; if None is given one is generated + based on a seed drawn from `rng` + + :type input: theano.tensor.TensorType + :param input: a symbolic description of the input or None for standalone + dA + + :type n_visible: int + :param n_visible: number of visible units + + :type n_hidden: int + :param n_hidden: number of hidden units + + :type W: theano.tensor.TensorType + :param W: Theano variable pointing to a set of weights that should be + shared belong the dA and another architecture; if dA should + be standalone set this to None + + :type bhid: theano.tensor.TensorType + :param bhid: Theano variable pointing to a set of biases values (for + hidden units) that should be shared belong dA and another + architecture; if dA should be standalone set this to None + + :type bvis: theano.tensor.TensorType + :param bvis: Theano variable pointing to a set of biases values (for + visible units) that should be shared belong dA and another + architecture; if dA should be standalone set this to None + + + """ + self.n_visible = n_visible + self.n_hidden = n_hidden + + # create a Theano random generator that gives symbolic random values + if not theano_rng : + theano_rng = RandomStreams(rng.randint(2**30)) + + # note : W' was written as `W_prime` and b' as `b_prime` + if not W: + # W is initialized with `initial_W` which is uniformely sampled + # from -4*sqrt(6./(n_visible+n_hidden)) and + # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if + # converted using asarray to dtype + # theano.config.floatX so that the code is runable on GPU + initial_W = numpy.asarray( numpy_rng.uniform( + low = -4*numpy.sqrt(6./(n_hidden+n_visible)), + high = 4*numpy.sqrt(6./(n_hidden+n_visible)), + size = (n_visible, n_hidden)), dtype = theano.config.floatX) + W = theano.shared(value = initial_W, name ='W') + + if not bvis: + bvis = theano.shared(value = numpy.zeros(n_visible, + dtype = theano.config.floatX)) + + if not bhid: + bhid = theano.shared(value = numpy.zeros(n_hidden, + dtype = theano.config.floatX), name ='b') + + + self.W = W + # b corresponds to the bias of the hidden + self.b = bhid + # b_prime corresponds to the bias of the visible + self.b_prime = bvis + # tied weights, therefore W_prime is W transpose + self.W_prime = self.W.T + self.theano_rng = theano_rng + # if no input is given, generate a variable representing the input + if input == None : + # we use a matrix because we expect a minibatch of several examples, + # each example being a row + self.x = T.dmatrix(name = 'input') + else: + self.x = input + + self.params = [self.W, self.b, self.b_prime] + + def get_corrupted_input(self, input, corruption_level): + """ This function keeps ``1-corruption_level`` entries of the inputs the same + and zero-out randomly selected subset of size ``coruption_level`` + Note : first argument of theano.rng.binomial is the shape(size) of + random numbers that it should produce + second argument is the number of trials + third argument is the probability of success of any trial + + this will produce an array of 0s and 1s where 1 has a probability of + 1 - ``corruption_level`` and 0 with ``corruption_level`` + + The binomial function return int64 data type by default. + int64 multiplicated by the input type(floatX) always return float64. + To keep all data in floatX when floatX is float32, we set the dtype + of the binomial to floatX. As in our case the value of the binomial + is always 0 or 1, this don't change the result. This is needed to allow + the gpu to work correctly as it only support float32 for now. + """ + + n = input.shape[0] + shuffled = T.permute_row_elements(input.T, self.theano_rng.permutation(n=n)).T + d = T.cast(self.n_visible * corruption_level, 'int32') + start = self.theano_rng.random_integers(low = 0, high = self.n_visible - d) + end = start + d + corrupted = T.set_subtensor(input[:, start:end], shuffled[:, start:end]) + return self.theano_rng.binomial( size = input.shape, n = 1, p = 1 - corruption_level, dtype=theano.config.floatX) * corrupted + + + def get_hidden_values(self, input): + """ Computes the values of the hidden layer """ + return T.nnet.sigmoid(T.dot(input, self.W) + self.b) + + def get_reconstructed_input(self, hidden ): + """ Computes the reconstructed input given the values of the hidden layer """ + return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime) + + def get_cost_updates(self, corruption_level, learning_rate): + """ This function computes the cost and the updates for one trainng + step of the dA """ + + tilde_x = self.get_corrupted_input(self.x, corruption_level) + y = self.get_hidden_values( tilde_x) + z = self.get_reconstructed_input(y) + # note : we sum over the size of a datapoint; if we are using minibatches, + # L will be a vector, with one entry per example in minibatch + L = - T.sum( self.x*T.log(z) + (1-self.x)*T.log(1-z), axis=1 ) + # note : L is now a vector, where each element is the cross-entropy cost + # of the reconstruction of the corresponding example of the + # minibatch. We need to compute the average of all these to get + # the cost of the minibatch + cost = T.mean(L) + + # compute the gradients of the cost of the `dA` with respect + # to its parameters + gparams = T.grad(cost, self.params) + # generate the list of updates + updates = {} + for param, gparam in zip(self.params, gparams): + updates[param] = param - learning_rate*gparam + + return (cost, updates) + + + + +def test_dA( learning_rate = 0.1, training_epochs = 15, dataset ='../data/mnist.pkl.gz', + batch_size = 200, output_folder = 'dA_plots' ): + + """ + This demo is tested on MNIST + + :type learning_rate: float + :param learning_rate: learning rate used for training the DeNosing AutoEncoder + + :type training_epochs: int + :param training_epochs: number of epochs used for training + + :type dataset: string + :param dataset: path to the picked dataset + + """ + datasets = load_data(dataset) + train_set_x, train_set_y = datasets[0] + + # compute number of minibatches for training, validation and testing + n_train_batches = train_set_x.value.shape[0] / batch_size + + # allocate symbolic variables for the data + index = T.lscalar() # index to a [mini]batch + x = T.matrix('x') # the data is presented as rasterized images + + + if not os.path.isdir(output_folder): + os.makedirs(output_folder) + os.chdir(output_folder) + + #################################### + # PLOT GOOD/CORRUPTED INPUT # + #################################### + rng = numpy.random.RandomState(123) + theano_rng = RandomStreams( rng.randint(2**30)) + da = dA(numpy_rng = rng, theano_rng = theano_rng, input = x, + n_visible = 28*28, n_hidden = 500) + + + #X = numpy.array(train_set_x.value[0:batch_size]) + #for x in X: + # d = 28*28//5 + # j = numpy.random.randint(28*28 - d) + # x[j:j+d] = X[numpy.random.randint(len(X))][j:j+d] + + + corrupt_input = theano.function([], + [da.get_corrupted_input(input = x, corruption_level = 0.3)], + givens = {x:train_set_x[0:batch_size]}) + X = corrupt_input()[0] + image = PIL.Image.fromarray(tile_raster_images( X = X, + img_shape = (28,28),tile_shape = (10,10), + tile_spacing=(1,1), scale_rows_to_unit_interval = False)) + image.save('input_corruption_30.png') + + + #################################### + # BUILDING THE MODEL NO CORRUPTION # + #################################### + + rng = numpy.random.RandomState(123) + theano_rng = RandomStreams( rng.randint(2**30)) + + da = dA(numpy_rng = rng, theano_rng = theano_rng, input = x, + n_visible = 28*28, n_hidden = 500) + + cost, updates = da.get_cost_updates(corruption_level = 0., + learning_rate = learning_rate) + + + train_da = theano.function([index], cost, updates = updates, + givens = {x:train_set_x[index*batch_size:(index+1)*batch_size]}) + + start_time = time.clock() + + ############ + # TRAINING # + ############ + + # go through training epochs + for epoch in xrange(training_epochs): + # go through trainng set + c = [] + for batch_index in xrange(n_train_batches): + c.append(train_da(batch_index)) + + print 'Training epoch %d, cost '%epoch, numpy.mean(c) + + end_time = time.clock() + + training_time = (end_time - start_time) + + print >> sys.stderr, ('The no corruption code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((training_time)/60.)) + image = PIL.Image.fromarray(tile_raster_images( X = da.W.value.T, + img_shape = (28,28),tile_shape = (10,10), + tile_spacing=(1,1))) + image.save('filters_corruption_0.png') + + ##################################### + # BUILDING THE MODEL CORRUPTION 30% # + ##################################### + + rng = numpy.random.RandomState(123) + theano_rng = RandomStreams( rng.randint(2**30)) + + da = dA(numpy_rng = rng, theano_rng = theano_rng, input = x, + n_visible = 28*28, n_hidden = 500) + + cost, updates = da.get_cost_updates(corruption_level = 0.3, + learning_rate = learning_rate) + + + train_da = theano.function([index], cost, updates = updates, + givens = {x:train_set_x[index*batch_size:(index+1)*batch_size]}) + + start_time = time.clock() + + ############ + # TRAINING # + ############ + + # go through training epochs + for epoch in xrange(training_epochs): + # go through trainng set + c = [] + for batch_index in xrange(n_train_batches): + c.append(train_da(batch_index)) + + print 'Training epoch %d, cost '%epoch, numpy.mean(c) + + end_time = time.clock() + + training_time = (end_time - start_time) + + print >> sys.stderr, ('The 30% corruption code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % (training_time/60.)) + + image = PIL.Image.fromarray(tile_raster_images( X = da.W.value.T, + img_shape = (28,28),tile_shape = (10,10), + tile_spacing=(1,1))) + image.save('filters_corruption_30.png') + + os.chdir('../') + + +if __name__ == '__main__': + test_dA() diff --git a/code/kNN.py b/code/kNN.py new file mode 100755 index 00000000..f35b71a9 --- /dev/null +++ b/code/kNN.py @@ -0,0 +1,139 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +""" + Naive kNN test/benchmarks. +""" + +from __future__ import division +import numpy, time, cPickle, gzip, sys, os, heapq +import theano +import theano.tensor as T +from operator import itemgetter +from utils import tile_raster_images +import PIL.Image + +# Needs: ../data/mnist.pkl.gz +from logistic_sgd import load_data + + + +def test_kNN( dataset ='../data/mnist.pkl.gz', + k = 9, + distance = T.sqr, + batch_size = 10000, + train_set_size = float('infinity'), + test_set_size = 1000, + img_shape = (28, 28), + output_folder = 'plots' ): + + """ + This demo is tested on '../data/mnist.pkl.gz' + + :type batch_size: int + :param batch_size: batch_size used for training + + :type dataset: string + :param dataset: path to the picked dataset + + """ + + from theano import ProfileMode + profmode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker()) + + + # load data + ((train_set_x, train_set_y), (valid_set_x, valid_set_y), + (test_set_x , test_set_y )) = load_data(dataset) + + # compute number of minibatches for training, validation and testing + train_set_size = min(train_set_x.value.shape[0], train_set_size) + test_set_size = min(test_set_x.value.shape[0], test_set_size) + n_train_batches = train_set_size / batch_size + + # + assert train_set_size > 0 + assert test_set_size > 0 + + # adjust img_shape + assert train_set_x.value[0].shape[0] == img_shape[0] * img_shape[1] + + + # allocate symbolic variables for the data + indexMB = T.lscalar() # index to a [mini]batch + indexT = T.lscalar() # index to a test set item + a = T.matrix('a') # the data is presented as vectors + b = T.vector('b') # the data is presented as vectors + + + if not os.path.isdir(output_folder): + os.makedirs(output_folder) + os.chdir(output_folder) + + ############################################# + # Exhaustive kNN queries for the test set # + ############################################# + + # define theano.function + DistancesF = T.sum( distance(a - b), axis=1 ) + calc_distances = theano.function([indexMB, indexT], DistancesF, + givens = {a:train_set_x[indexMB*batch_size:(indexMB+1)*batch_size], + b:test_set_x[indexT]}, + mode=profmode) + + # go through test set + start_time = time.clock() + kNN = [] + for test_index in xrange(test_set_size): + c = [] + for batch_index in xrange(n_train_batches): + batchDistances = calc_distances(batch_index, test_index) + batchKNN = [(batch_index * batch_size + i, batchDistances[i]) \ + for i in batchDistances.argsort()[:k] ] + c.extend(batchKNN) + + c.sort(key=itemgetter(1)) + kNN.append(c[:k]) + + # print out time/ETA + if(test_index and not test_index % 100): + test_time = time.clock() - start_time + 0.00000001; + ETA = ((test_time / test_index) * test_set_size - test_time); + print "%.1f seconds; test %d (%d); ETA %.1f seconds" % (test_time, test_index, test_set_size, ETA), + print "Gdists/sec = %f (3 flop/dist)" % (28*28*train_set_size*test_index / 1000000000 / test_time) + + end_time = time.clock() + + #for t in kNN: print t + training_time = (end_time - start_time) + profmode.print_summary() + print >> sys.stderr, ('The kNN search code for file '+os.path.split(__file__)[1]+' ran for %.2fm' % ((training_time)/60.)) + + + ### FIXME, why using test_set_x.value[i] directly eats memory? + ### the whole thing gets transferred from the GPU? + # + # X = [train_set_x.value[i] for i in xrange(train_set_x.value.shape[0])] + + ############################################# + # Plot Results # + ############################################# + tN = 100; X = [] + for test_index in xrange(test_set_size): + X.append(numpy.array(test_set_x.value[test_index])) + for i in kNN[test_index]: X.append(numpy.array(train_set_x.value[i[0]])) + if len(X) >= tN: break + + plot_file_name = os.path.split(dataset)[1] + "-kNN.png" + PIL.Image.fromarray( tile_raster_images( numpy.asarray(X), + img_shape = img_shape, tile_shape = (tN//(1+k),10), + tile_spacing=(1,1), scale_rows_to_unit_interval = False, + output_pixel_vals = True)).save(plot_file_name) + print >> sys.stderr, ("Produced plot %s/%s (1T%dNN)" % (output_folder, plot_file_name, k)) + + os.chdir('../') + + +if __name__ == "__main__": + test_kNN() + diff --git a/code/load_distances.py b/code/load_distances.py new file mode 100755 index 00000000..d14ee7f1 --- /dev/null +++ b/code/load_distances.py @@ -0,0 +1,68 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from __future__ import division + +import mad, numpy, random, os, time, cPickle, gzip, sys +from theano.tensor.shared_randomstreams import RandomStreams +from utils import tile_raster_images +import matplotlib.pyplot +import theano +import theano.tensor as T + +# corpus size +N_DIM = 576 # 576 dims +(TRAIN, VALID, TEST) = (250000, 10, 10) + + +def load_data(dataset = ""): + ''' Loads the fake 'distances' dataset: + [[label/N, label/N, ...] : label, ...] for label in [0..N] + + :type dataset: string + :param dataset: the path to the dataset (ignored) + ''' + + ############# + # LOAD DATA # + ############# + print '... loading data' + + # make train/valid/test datasets + train_set = ([[i/TRAIN]*N_DIM for i in xrange(0, TRAIN)], + [i for i in xrange(0, TRAIN)] ) + valid_set = ([[i/VALID]*N_DIM for i in xrange(VALID)], + [i for i in xrange(VALID)] ) + test_set = ([[i/TEST]*N_DIM for i in xrange(TEST)], + [i for i in xrange(TEST)] ) + + + def shared_dataset(data_xy): + """ Function that loads the dataset into shared variables + + The reason we store our dataset in shared variables is to allow + Theano to copy it into the GPU memory (when code is run on GPU). + Since copying data into the GPU is slow, copying a minibatch everytime + is needed (the default behaviour if the data is not in a shared + variable) would lead to a large decrease in performance. + """ + data_x, data_y = data_xy + shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) + shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) + # When storing data on the GPU it has to be stored as floats + # therefore we will store the labels as ``floatX`` as well + # (``shared_y`` does exactly that). But during our computations + # we need them as ints (we use labels as index, and if they are + # floats it doesn't make sense) therefore instead of returning + # ``shared_y`` we will have to cast it to int. This little hack + # lets ous get around this issue + return shared_x, T.cast(shared_y, 'int32') + + test_set_x, test_set_y = shared_dataset(test_set) + valid_set_x, valid_set_y = shared_dataset(valid_set) + train_set_x, train_set_y = shared_dataset(train_set) + rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] + return rval + + +if __name__ == "__main__": + load_data() diff --git a/code/load_mnist.py b/code/load_mnist.py new file mode 100755 index 00000000..cdee7217 --- /dev/null +++ b/code/load_mnist.py @@ -0,0 +1,92 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from __future__ import division + +""" +Load mnist digits +""" +__docformat__ = 'restructedtext en' + +import numpy, time, cPickle, gzip, sys, os +from utils import tile_raster_images + +import theano +import theano.tensor as T + + +A_DIM = 28 +B_DIM = 28 +N_DIM = 28*28 + + + +def load_data(dataset = '../data/mnist.pkl.gz'): + ''' Loads the dataset + + :type dataset: string + :param dataset: the path to the dataset (here MNIST) + ''' + + ############# + # LOAD DATA # + ############# + print '... loading data' + + # Load the dataset + f = gzip.open(dataset,'rb') + train_set, valid_set, test_set = cPickle.load(f) + f.close() + + test_set = (test_set[0][:200], test_set[1][:200]) + print train_set[1][:10] + + def shared_dataset(data_xy): + """ Function that loads the dataset into shared variables + + The reason we store our dataset in shared variables is to allow + Theano to copy it into the GPU memory (when code is run on GPU). + Since copying data into the GPU is slow, copying a minibatch everytime + is needed (the default behaviour if the data is not in a shared + variable) would lead to a large decrease in performance. + """ + data_x, data_y = data_xy + shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) + shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) + # When storing data on the GPU it has to be stored as floats + # therefore we will store the labels as ``floatX`` as well + # (``shared_y`` does exactly that). But during our computations + # we need them as ints (we use labels as index, and if they are + # floats it doesn't make sense) therefore instead of returning + # ``shared_y`` we will have to cast it to int. This little hack + # lets ous get around this issue + return shared_x, T.cast(shared_y, 'int32') + + test_set_x, test_set_y = shared_dataset(test_set) + valid_set_x, valid_set_y = shared_dataset(valid_set) + train_set_x, train_set_y = shared_dataset(train_set) + + rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] + return rval + + + +if __name__ == "__main__": + import PIL.Image + ((train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)) = \ + load_data() + + tN = 2000 + print "len(train_set_x.value.T)", len(train_set_x.value) + for i in xrange(1): # xrange(len(train_set_x.value)//tN): + X = [train_set_x.value[j] for j in xrange(i*tN, i*tN+tN)] + X = numpy.asarray(X) + arr = tile_raster_images( X, + img_shape = (A_DIM, B_DIM),tile_shape = (tN//10,10), + tile_spacing=(1,1), scale_rows_to_unit_interval = False, + output_pixel_vals = True) + #matplotlib.pyplot.imsave(fname = 'hhgttg010100%d.png' % i, arr = arr)#, vmin = 0.0, vmax = 1.0) + PIL.Image.fromarray(arr).save('mnist%d.png' % i) + + + + diff --git a/code/load_mp3.py b/code/load_mp3.py new file mode 100755 index 00000000..1a8f8a99 --- /dev/null +++ b/code/load_mp3.py @@ -0,0 +1,146 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from __future__ import division + +import mad, numpy, random, os, time, cPickle, gzip, sys +from theano.tensor.shared_randomstreams import RandomStreams +from utils import tile_raster_images +import matplotlib.pyplot +import theano +import theano.tensor as T + +F_DIM = 1 # 2 frames +X_DIM = 36 # 36 samples +S_DIM = 8 # 8 subbands + + +# SAMPLES [MDCTed] +# S -------------------> +# U | +# B | amplitudes +# B | in colors +# A | blue ... red +# N | +# D V +# S +# + +def load_data(dataset): + ''' Loads the dataset + + :type dataset: string + :param dataset: the path to the dataset (ignored) + ''' + + ############# + # LOAD DATA # + ############# + print '... loading data' + + + frames = [] + def fCallback(mf): + frame = [] + channel = 0 + for sample in xrange(X_DIM): + for subband in reversed(xrange(S_DIM)): + frame.append((mf.subband_value(channel, sample, subband) + 1.0) / 2.0) + # fake it + #frame.append(subband / S_DIM * len(frames) / 2294) + frames.append(frame) + + # frequency domain filter + for f in dataset: + mf = mad.MadFile(f) + mf.set_filter_callback(fCallback) + while mf.read(): + pass + + # corpus size + (TRAIN, VALID, TEST) = (100, 1, 1) + SIZE = len(frames) // (TRAIN+VALID+TEST) + (TRAIN, VALID, TEST) = (TRAIN * SIZE, VALID * SIZE, TEST * SIZE) + print 'frames =', len(frames), 'TRAIN = ', TRAIN + + + + corpus = [] + for i in xrange(TRAIN + VALID + TEST): + x = frames[i * F_DIM : i * F_DIM + F_DIM] + # concatenate frames, set label + corpus.append((sum(x, []), i)) + + + # make train/valid/test datasets + train_set = ([corpus[i][0] for i in xrange(0, TRAIN)], + [corpus[i][1] for i in xrange(0, TRAIN)] ) + valid_set = ([corpus[i][0] for i in xrange(TRAIN, TRAIN + VALID)], + [corpus[i][1] for i in xrange(TRAIN, TRAIN + VALID)] ) + test_set = ([corpus[i][0] for i in xrange(TRAIN + VALID, TRAIN + VALID + TEST)], + [corpus[i][1] for i in xrange(TRAIN + VALID, TRAIN + VALID + TEST)] ) + + print 'input = ', len(train_set[0][0]) + #for i in xrange(F_DIM * X_DIM * S_DIM): + # print train_set[0][0][i], + # if not (i+1)%S_DIM: print + #print 'label =', train_set[1][0] + + def shared_dataset(data_xy): + """ Function that loads the dataset into shared variables + + The reason we store our dataset in shared variables is to allow + Theano to copy it into the GPU memory (when code is run on GPU). + Since copying data into the GPU is slow, copying a minibatch everytime + is needed (the default behaviour if the data is not in a shared + variable) would lead to a large decrease in performance. + """ + data_x, data_y = data_xy + shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) + shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) + # When storing data on the GPU it has to be stored as floats + # therefore we will store the labels as ``floatX`` as well + # (``shared_y`` does exactly that). But during our computations + # we need them as ints (we use labels as index, and if they are + # floats it doesn't make sense) therefore instead of returning + # ``shared_y`` we will have to cast it to int. This little hack + # lets ous get around this issue + return shared_x, T.cast(shared_y, 'int32') + + test_set_x, test_set_y = shared_dataset(test_set) + valid_set_x, valid_set_y = shared_dataset(valid_set) + train_set_x, train_set_y = shared_dataset(train_set) + print train_set_x.value + rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] + return rval + + + +if __name__ == "__main__": + import PIL.Image + d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy" + dataset = sorted([os.path.join(d, f) for f in os.listdir(d)]) + dataset = ["/home/dmitry/mp3/440-1k-2k.mp3"] + + # lame --preset cbr 48kbit -m mono + ((train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)) = \ + load_data(dataset[:1]) + + #from pca import pca + #V,S,immean = pca(train_set_x.value[0:200]) + #train_set_x.value = V + #print V + + #matplotlib.pyplot.imshow(V[0].reshape(S_DIM, F_DIM * X_DIM).T) + #matplotlib.pyplot.imshow(train_set_x.value[0].reshape(S_DIM, F_DIM * X_DIM), vmin = 0.0, vmax = 1.0) + #matplotlib.pyplot.show() + + tN = 499 + print "len(train_set_x.value.T)", len(train_set_x.value) + for i in xrange(len(train_set_x.value)//tN): + arr = tile_raster_images( X = train_set_x.value[i*tN:i*tN+tN], + img_shape = (S_DIM, F_DIM * X_DIM),tile_shape = (tN//10,10), + tile_spacing=(1,1), scale_rows_to_unit_interval = False, + output_pixel_vals = True) + #matplotlib.pyplot.imsave(fname = 'hhgttg010100%d.png' % i, arr = arr)#, vmin = 0.0, vmax = 1.0) + PIL.Image.fromarray(arr).save('hhgttg010100%d.png' % i) + diff --git a/code/load_names.py b/code/load_names.py new file mode 100755 index 00000000..fae35467 --- /dev/null +++ b/code/load_names.py @@ -0,0 +1,118 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from __future__ import division + +import csv, string, numpy, random +import theano +import theano.tensor as T + +X_DIM = 28 +V_DIM = 28 + +# map letters to letters 'a':['a'], 'b':['b'] ... +# LVECTORS = {} +#for i in xrange(0,26): LVECTORS[chr(i + ord('a'))] = [chr(i + ord('a'))] +#LVECTORS[' '] = [' '] +#LVECTORS['-'] = ['-'] +#LVECTORS["'"] = ["'"] + +# map letters to binary vectors 'a':[1.0, 0.0, 0.0, ...], b:[0.0, 1.0, 0.0, ...] ... +LVECTORS = {} +for i in xrange(0,26): LVECTORS[chr(i + ord('a'))] = [(0.0, 1.0)[i == j] for j in xrange(V_DIM)] +LVECTORS[' '] = [(0.0, 1.0)[V_DIM == j] for j in xrange(V_DIM)] +LVECTORS['-'] = [(0.0, 1.0)[26 == j] for j in xrange(V_DIM)] +LVECTORS["'"] = [(0.0, 1.0)[27 == j] for j in xrange(V_DIM)] + + +def load_data(dataset): + ''' Loads the dataset + + :type dataset: string + :param dataset: the path to the dataset (ignored) + ''' + + ############# + # LOAD DATA # + ############# + print '... loading data' + + # Using real names corpus, requires NLTK (sudo apt-get install python-nltk) + # >>> import nltk + # >>> nltk.download("names") + # + # import nltk + # from nltk.corpus import names + # names = ([(name, 1) for name in names.words('male.txt')] + + # [(name, 0) for name in names.words('female.txt')]) + + + # Using fake male/female names, random letters, 5-15 in length + # female ends on 'a', 'e', 'i' + # male ends on 'k', 'o', 'r', 's', 't' + def fake(): + gender = random.choice((0, 1)) + name = ''.join(random.choice(string.letters) for i in xrange(random.randint(5,12))) + name += random.choice((['a', 'e', 'i'], ['k', 'o', 'r', 's', 't'])[gender]) + return name.capitalize(), gender + names = [fake() for i in xrange(10000)] + print "len(names)", len(names), names[:5] + + # make train/valid/test datasets + SHIFTS = 10 + TRAIN = 6000 * SHIFTS; VALID = 1200 * SHIFTS; TEST = 600 * SHIFTS; + corpus = [] + for i in xrange(TRAIN + VALID + TEST): + (word, label) = random.choice(names) + x = [LVECTORS[l] for l in list(word.lower())] + space = X_DIM - len(word); assert(space > 0); + shift = random.randint(0, space//2) + # produce shifted vectorized word (letter vectors are concatenated) + v = sum([LVECTORS[' ']] * shift + x + [LVECTORS[' ']] * (space - shift), []) + corpus.append((v, label)) + + random.shuffle(corpus) + + train_set = ([corpus[i][0] for i in xrange(0, TRAIN)], + [corpus[i][1] for i in xrange(0, TRAIN)] ) + valid_set = ([corpus[i][0] for i in xrange(TRAIN, TRAIN + VALID)], + [corpus[i][1] for i in xrange(TRAIN, TRAIN + VALID)] ) + test_set = ([corpus[i][0] for i in xrange(TRAIN + VALID, TRAIN + VALID + TEST)], + [corpus[i][1] for i in xrange(TRAIN + VALID, TRAIN + VALID + TEST)] ) + + print 'input =' + for i in xrange(V_DIM * X_DIM): + print train_set[0][0][i], + if not (i+1)%V_DIM: print + print 'label =', train_set[1][0] + + def shared_dataset(data_xy): + """ Function that loads the dataset into shared variables + + The reason we store our dataset in shared variables is to allow + Theano to copy it into the GPU memory (when code is run on GPU). + Since copying data into the GPU is slow, copying a minibatch everytime + is needed (the default behaviour if the data is not in a shared + variable) would lead to a large decrease in performance. + """ + data_x, data_y = data_xy + shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) + shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) + # When storing data on the GPU it has to be stored as floats + # therefore we will store the labels as ``floatX`` as well + # (``shared_y`` does exactly that). But during our computations + # we need them as ints (we use labels as index, and if they are + # floats it doesn't make sense) therefore instead of returning + # ``shared_y`` we will have to cast it to int. This little hack + # lets ous get around this issue + return shared_x, T.cast(shared_y, 'int32') + + test_set_x, test_set_y = shared_dataset(test_set) + valid_set_x, valid_set_y = shared_dataset(valid_set) + train_set_x, train_set_y = shared_dataset(train_set) + print train_set_x.value + rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] + return rval + +if __name__ == "__main__": + load_data('') + diff --git a/code/plots/50Hz-1kHz-2kHz.png b/code/plots/50Hz-1kHz-2kHz.png new file mode 100644 index 00000000..cc9de6fb Binary files /dev/null and b/code/plots/50Hz-1kHz-2kHz.png differ diff --git a/code/plots/hhgttg0101007.png b/code/plots/hhgttg0101007.png new file mode 100644 index 00000000..dde7f158 Binary files /dev/null and b/code/plots/hhgttg0101007.png differ diff --git a/code/plots/mnist.pkl.gz-kNN.png b/code/plots/mnist.pkl.gz-kNN.png new file mode 100644 index 00000000..8ac09ae7 Binary files /dev/null and b/code/plots/mnist.pkl.gz-kNN.png differ diff --git a/code/plots/v06.png b/code/plots/v06.png new file mode 100644 index 00000000..e6e10d77 Binary files /dev/null and b/code/plots/v06.png differ diff --git a/code/rbm.py b/code/rbm.py index 689b3f74..4e055f19 100644 --- a/code/rbm.py +++ b/code/rbm.py @@ -15,11 +15,11 @@ from theano.tensor.shared_randomstreams import RandomStreams from utils import tile_raster_images -from logistic_sgd import load_data +from load_mp3 import * class RBM(object): """Restricted Boltzmann Machine (RBM) """ - def __init__(self, input=None, n_visible=784, n_hidden=500, \ + def __init__(self, input=None, n_visible=S_DIM * F_DIM * X_DIM, n_hidden=1000, \ W = None, hbias = None, vbias = None, numpy_rng = None, theano_rng = None): """ @@ -302,7 +302,7 @@ def get_reconstruction_cost(self, updates, pre_sigmoid_nv): def test_rbm(learning_rate=0.1, training_epochs = 15, dataset='../data/mnist.pkl.gz', batch_size = 20, n_chains = 20, n_samples = 10, output_folder = 'rbm_plots', - n_hidden = 500): + n_hidden = 1000): """ Demonstrate how to train and afterwards sample from it using Theano. @@ -341,7 +341,7 @@ def test_rbm(learning_rate=0.1, training_epochs = 15, persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),dtype=theano.config.floatX)) # construct the RBM class - rbm = RBM( input = x, n_visible=28*28, \ + rbm = RBM( input = x, n_visible=S_DIM * F_DIM * X_DIM, \ n_hidden = n_hidden, numpy_rng = rng, theano_rng = theano_rng) # get the cost and the gradient corresponding to one step of CD-15 @@ -380,7 +380,7 @@ def test_rbm(learning_rate=0.1, training_epochs = 15, plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray(tile_raster_images( X = rbm.W.value.T, - img_shape = (28,28),tile_shape = (10,10), + img_shape = (S_DIM, F_DIM * X_DIM),tile_shape = (10,10), tile_spacing=(1,1))) image.save('filters_at_epoch_%i.png'%epoch) plotting_stop = time.clock() @@ -424,25 +424,24 @@ def test_rbm(learning_rate=0.1, training_epochs = 15, # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates = updates) - - - # create a space to store the image for plotting ( we need to leave - # room for the tile_spacing as well) - image_data = numpy.zeros((29*n_samples+1,29*n_chains-1),dtype='uint8') for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() - print ' ... plotting sample ', idx - image_data[29*idx:29*idx+28,:] = tile_raster_images( - X = vis_mf, - img_shape = (28,28), - tile_shape = (1, n_chains), - tile_spacing = (1,1)) + print ' ... plotting sample ', idx + image = PIL.Image.fromarray( + tile_raster_images( + X = vis_mf, + img_shape = (S_DIM, F_DIM * X_DIM), + tile_shape = (1, n_chains), + tile_spacing = (1,1))) + image.save('sample-%d.png' %idx) # construct image - image = PIL.Image.fromarray(image_data) - image.save('samples.png') os.chdir('../') if __name__ == '__main__': - test_rbm() + from dA import dA + d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy" + dataset = sorted([os.path.join(d, f) for f in os.listdir(d)]) + #dataset = ["/home/dmitry/mp3/hhgttg0101006048k.mp3"] + test_rbm(dataset = dataset[:10], training_epochs = 15) diff --git a/code/shift_mp3.py b/code/shift_mp3.py new file mode 100644 index 00000000..90583b89 --- /dev/null +++ b/code/shift_mp3.py @@ -0,0 +1,182 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from __future__ import division + +import mad, numpy, random, os, time, cPickle, gzip, sys +from theano.tensor.shared_randomstreams import RandomStreams +from utils import tile_raster_images +import matplotlib.pyplot +from collections import deque +from operator import itemgetter +import theano +import theano.tensor as T + +X_DIM = 36 # 36 samples +S_DIM = 8 # 8 subbands + + +# SAMPLES [MDCTed] +# S -------------------> +# U | +# B | amplitudes +# B | in colors +# A | blue ... red +# N | +# D V +# S +# + +def load_mp3(filename): + ''' Loads the mp3 file to numpy array + + :type filename: string + :param filename: the path to the mp3 file + ''' + + ############# + # LOAD DATA # + ############# + print '... loading data. filename = ', filename + mf = mad.MadFile(filename) + samples = numpy.zeros((mf.frames() * X_DIM, S_DIM), dtype=theano.config.floatX) + load_mp3.sample = 0; + + def fCallback(mf): + channel = 0 + for frame_sample in xrange(X_DIM): + for subband in reversed(xrange(S_DIM)): + samples[load_mp3.sample,subband] = mf.subband_value(channel, frame_sample, subband) + load_mp3.sample += 1 + + # frequency domain filter + mf.set_filter_callback(fCallback) + while mf.read(): + pass + + print "frames = ", load_mp3.sample / X_DIM, "mf.frames() = ", mf.frames() + return samples + + +def save_wave(samples, filename): + ''' Saves samples (numpy array) to the wave file. + Uses the template mp3 file to set sampling rate. + + :type filename: string + :param filename: the path to the input mp3 file + the path to the output .cdr file + ''' + + save_wave.sample = 0; + def fCallback(mf): + for frame_sample in xrange(X_DIM): + for subband in reversed(xrange(S_DIM)): + mf.subband_value(0, frame_sample, subband, samples[save_wave.sample, subband]) + mf.subband_value(1, frame_sample, subband, samples[save_wave.sample, subband]) + for subband in reversed(xrange(S_DIM,32)): + mf.subband_value(0, frame_sample, subband, 0.0) + mf.subband_value(1, frame_sample, subband, 0.0) + + save_wave.sample += 1 + + if len(samples) == save_wave.sample: + return False # to stop + + # frequency domain filter + mf = mad.MadFile(filename) + mf.set_filter_callback(fCallback) + + f = open(filename + ".cdr", 'wb') + while 1: + buffy = mf.read() + if buffy is None: + break + f.write(buffy) + + +def shift_kNN( train_set_files, + test_set_file, + k = 9, + distance = T.sqr, + test_set_size = X_DIM*200000, + output_folder = 'plots' ): + + ############################################# + # Exhaustive kNN queries for the test set # + ############################################# + + # allocate symbolic variables for the data + index = T.lscalar() # index to a test set item + a = T.matrix('a') # the data is presented as vectors + b = T.vector('b') # the data is presented as vectors + + # load test set + test_set_samples = load_mp3(test_set_file)[:test_set_size] + test_set_size = len(test_set_samples) + test_set_x = theano.shared(test_set_samples) + + + # go through train set + kNN = [[] for test_index in xrange(test_set_size)] + + for filename in train_set_files: + train_set_samples = load_mp3(filename) + train_set_x = theano.shared(train_set_samples) + + # frames (X_DIM samples) + frameQueue = deque(maxlen = X_DIM) + + # define theano.function + DistancesF = T.sum( distance(a - b), axis=1 ) + calc_distances = theano.function([index], DistancesF, + givens = {a:train_set_x[:], b:test_set_x[index]}) + + start_time = time.clock() + for test_index in xrange(test_set_size): + sampleDistances = calc_distances(test_index) + + # accumulate frame distances + for i, frameDistances in enumerate(frameQueue): + frameDistances[:-i-1] += sampleDistances[i+1:] + frameQueue.appendleft(sampleDistances) + + # find kNN frame + if len(frameQueue) == X_DIM: + frameDistances = frameQueue.pop() + #fileKNN = [(filename, i, frameDistances[i]) for i in frameDistances.argsort()[:k] ] + #kNN[test_index].extend(fileKNN) + i = frameDistances[:-X_DIM].argmin() + kNN[test_index - X_DIM].append((filename, i, frameDistances[i])) + + + # print out time/ETA + if(test_index and not test_index % 100): + test_time = time.clock() - start_time + 0.00000001; + ETA = ((test_time / test_index) * test_set_size - test_time); + print "%.1f seconds; test %d (%d); ETA %.1f seconds" % (test_time, test_index, test_set_size, ETA) + + # sort for nearest + for skNN in kNN: + skNN.sort(key=itemgetter(2)) + + + # construct and save o file + X = numpy.zeros((test_set_size, S_DIM), dtype=theano.config.floatX) + for filename in train_set_files: + train_set_samples = load_mp3(filename) + for test_index in xrange(test_set_size - X_DIM): + (f, i, d) = kNN[test_index][0] + if f != filename: continue + X[test_index] = train_set_samples[i] + + save_wave(X, filename = test_set_file) + + +if __name__ == "__main__": + import PIL.Image + d = "/home/dmitry/mp3/01- Hitchhikers Guide to the Galaxy" + train_set_files = sorted([os.path.join(d, f) for f in os.listdir(d) if f.endswith(".mp3")]) + test_set_file = "/home/dmitry/mp3/hhgttg01010060.mp3" + test_set_file = "/home/dmitry/mp3/advice_to_little_girls_twain_alnl.mp3" + test_set_file = "mp3/test.mp3" + + shift_kNN(train_set_files[1:2], test_set_file) diff --git a/code/speak_mp3.sh b/code/speak_mp3.sh new file mode 100644 index 00000000..b0f92278 --- /dev/null +++ b/code/speak_mp3.sh @@ -0,0 +1,4 @@ +espeak -w mp3/test.wav "This is a test." +sox mp3/test.wav -c 2 -r 44100 mp3/test-441s.wav +lame -ms -cbr mp3/test-441s.wav mp3/test.mp3 + diff --git a/code/utils.py b/code/utils.py index 781b3d48..f6a7270a 100644 --- a/code/utils.py +++ b/code/utils.py @@ -19,7 +19,8 @@ def scale_to_unit_interval(ndar,eps=1e-8): def tile_raster_images(X, img_shape, tile_shape,tile_spacing = (0,0), - scale_rows_to_unit_interval = True, output_pixel_vals = True): + scale_rows_to_unit_interval = True, output_pixel_vals = True, + transpose = False): """ Transform an array with one flattened image per row, into an array in which images are reshaped and layed out like tiles on a floor. @@ -55,6 +56,9 @@ def tile_raster_images(X, img_shape, tile_shape,tile_spacing = (0,0), assert len(tile_shape) == 2 assert len(tile_spacing) == 2 + if transpose: + img_shape = tuple(reversed(img_shape)) + # The expression below can be re-written in a more C style as # follows : # @@ -116,7 +120,10 @@ def tile_raster_images(X, img_shape, tile_shape,tile_spacing = (0,0), # function this_img = scale_to_unit_interval(X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)) else: - this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape) + this_img = X[tile_row * tile_shape[1] + tile_col].reshape( img_shape ) + + if transpose: + this_img = this_img.T # add the slice to the corresponding position in the # output array c = 1