-
Notifications
You must be signed in to change notification settings - Fork 2.1k
added dropout #145
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
added dropout #145
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,208 +10,9 @@ | |
| import theano.tensor.shared_randomstreams | ||
| import gzip | ||
| from collections import OrderedDict | ||
| from logistic_sgd import LogisticRegression, load_data | ||
|
|
||
|
|
||
| class LogisticRegression(object): | ||
| """Multi-class Logistic Regression Class | ||
|
|
||
| The logistic regression is fully described by a weight matrix :math:`W` | ||
| and bias vector :math:`b`. Classification is done by projecting data | ||
| points onto a set of hyperplanes, the distance to which is used to | ||
| determine a class membership probability. | ||
| """ | ||
|
|
||
| def __init__(self, input, n_in, n_out, W=None, b=None): | ||
| """ Initialize the parameters of the logistic regression | ||
|
|
||
| :type input: theano.tensor.TensorType | ||
| :param input: symbolic variable that describes the input of the | ||
| architecture (one minibatch) | ||
|
|
||
| :type n_in: int | ||
| :param n_in: number of input units, the dimension of the space in | ||
| which the datapoints lie | ||
|
|
||
| :type n_out: int | ||
| :param n_out: number of output units, the dimension of the space in | ||
| which the labels lie | ||
|
|
||
| """ | ||
| # start-snippet-1 | ||
| # initialize with 0 the weights W as a matrix of shape (n_in, n_out) | ||
| if W is None: | ||
| self.W = theano.shared( | ||
| value=np.zeros((n_in, n_out), dtype=theano.config.floatX), | ||
| name='W') | ||
| else: | ||
| self.W = W | ||
|
|
||
| # initialize the baises b as a vector of n_out 0s | ||
| if b is None: | ||
| self.b = theano.shared( | ||
| value=np.zeros((n_out,), dtype=theano.config.floatX), | ||
| name='b') | ||
| else: | ||
| self.b = b | ||
|
|
||
| # symbolic expression for computing the matrix of class-membership | ||
| # probabilities | ||
| # Where: | ||
| # W is a matrix where column-k represent the separation hyperplane for | ||
| # class-k | ||
| # x is a matrix where row-j represents input training sample-j | ||
| # b is a vector where element-k represent the free parameter of | ||
| # hyperplane-k | ||
| self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) | ||
|
|
||
| # symbolic description of how to compute prediction as class whose | ||
| # probability is maximal | ||
| self.y_pred = T.argmax(self.p_y_given_x, axis=1) | ||
| # end-snippet-1 | ||
|
|
||
| # parameters of the model | ||
| self.params = [self.W, self.b] | ||
|
|
||
| # keep track of model input | ||
| self.input = input | ||
|
|
||
| def negative_log_likelihood(self, y): | ||
| """Return the mean of the negative log-likelihood of the prediction | ||
| of this model under a given target distribution. | ||
|
|
||
| .. math:: | ||
|
|
||
| \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) = | ||
| \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} | ||
| \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\ | ||
| \ell (\theta=\{W,b\}, \mathcal{D}) | ||
|
|
||
| :type y: theano.tensor.TensorType | ||
| :param y: corresponds to a vector that gives for each example the | ||
| correct label | ||
|
|
||
| Note: we use the mean instead of the sum so that | ||
| the learning rate is less dependent on the batch size | ||
| """ | ||
| # start-snippet-2 | ||
| # y.shape[0] is (symbolically) the number of rows in y, i.e., | ||
| # number of examples (call it n) in the minibatch | ||
| # T.arange(y.shape[0]) is a symbolic vector which will contain | ||
| # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of | ||
| # Log-Probabilities (call it LP) with one row per example and | ||
| # one column per class LP[T.arange(y.shape[0]),y] is a vector | ||
| # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., | ||
| # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is | ||
| # the mean (across minibatch examples) of the elements in v, | ||
| # i.e., the mean log-likelihood across the minibatch. | ||
| return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) | ||
| # end-snippet-2 | ||
|
|
||
| def errors(self, y): | ||
| """Return a float representing the number of errors in the minibatch | ||
| over the total number of examples of the minibatch ; zero one | ||
| loss over the size of the minibatch | ||
|
|
||
| :type y: theano.tensor.TensorType | ||
| :param y: corresponds to a vector that gives for each example the | ||
| correct label | ||
| """ | ||
|
|
||
| # check if y has same dimension of y_pred | ||
| if y.ndim != self.y_pred.ndim: | ||
| raise TypeError( | ||
| 'y should have the same shape as self.y_pred', | ||
| ('y', y.type, 'y_pred', self.y_pred.type) | ||
| ) | ||
| # check if y is of the correct datatype | ||
| if y.dtype.startswith('int'): | ||
| # the T.neq operator returns a vector of 0s and 1s, where 1 | ||
| # represents a mistake in prediction | ||
| return T.mean(T.neq(self.y_pred, y)) | ||
| else: | ||
| raise NotImplementedError() | ||
|
|
||
|
|
||
| def load_data(dataset): | ||
| ''' Loads the dataset | ||
|
|
||
| :type dataset: string | ||
| :param dataset: the path to the dataset (here MNIST) | ||
| ''' | ||
|
|
||
| ############# | ||
| # LOAD DATA # | ||
| ############# | ||
|
|
||
| # Download the MNIST dataset if it is not present | ||
| data_dir, data_file = os.path.split(dataset) | ||
| if data_dir == "" and not os.path.isfile(dataset): | ||
| # Check if dataset is in the data directory. | ||
| new_path = os.path.join( | ||
| os.path.split(__file__)[0], | ||
| "..", | ||
| "data", | ||
| dataset | ||
| ) | ||
| if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': | ||
| dataset = new_path | ||
|
|
||
| if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': | ||
| from six.moves import urllib | ||
| origin = ( | ||
| 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' | ||
| ) | ||
| print('Downloading data from %s' % origin) | ||
| urllib.request.urlretrieve(origin, dataset) | ||
|
|
||
| print('... loading data') | ||
|
|
||
| # Load the dataset | ||
| with gzip.open(dataset, 'rb') as f: | ||
| try: | ||
| train_set, valid_set, test_set = pickle.load(f, encoding='latin1') | ||
| except: | ||
| train_set, valid_set, test_set = pickle.load(f) | ||
| # train_set, valid_set, test_set format: tuple(input, target) | ||
| # input is a numpy.ndarray of 2 dimensions (a matrix) | ||
| # where each row corresponds to an example. target is a | ||
| # numpy.ndarray of 1 dimension (vector) that has the same length as | ||
| # the number of rows in the input. It should give the target | ||
| # to the example with the same index in the input. | ||
|
|
||
| def shared_dataset(data_xy, borrow=True): | ||
| """ Function that loads the dataset into shared variables | ||
|
|
||
| The reason we store our dataset in shared variables is to allow | ||
| Theano to copy it into the GPU memory (when code is run on GPU). | ||
| Since copying data into the GPU is slow, copying a minibatch everytime | ||
| is needed (the default behaviour if the data is not in a shared | ||
| variable) would lead to a large decrease in performance. | ||
| """ | ||
| data_x, data_y = data_xy | ||
| shared_x = theano.shared(np.asarray(data_x, | ||
| dtype=theano.config.floatX), | ||
| borrow=borrow) | ||
| shared_y = theano.shared(np.asarray(data_y, | ||
| dtype=theano.config.floatX), | ||
| borrow=borrow) | ||
| # When storing data on the GPU it has to be stored as floats | ||
| # therefore we will store the labels as ``floatX`` as well | ||
| # (``shared_y`` does exactly that). But during our computations | ||
| # we need them as ints (we use labels as index, and if they are | ||
| # floats it doesn't make sense) therefore instead of returning | ||
| # ``shared_y`` we will have to cast it to int. This little hack | ||
| # lets ous get around this issue | ||
| return shared_x, T.cast(shared_y, 'int32') | ||
|
|
||
| test_set_x, test_set_y = shared_dataset(test_set) | ||
| valid_set_x, valid_set_y = shared_dataset(valid_set) | ||
| train_set_x, train_set_y = shared_dataset(train_set) | ||
|
|
||
| rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), | ||
| (test_set_x, test_set_y)] | ||
| return rval | ||
|
|
||
|
|
||
| class HiddenLayer(object): | ||
| def __init__(self, rng, input, n_in, n_out, W=None, b=None, | ||
|
|
@@ -292,10 +93,33 @@ def _dropsout(rng, layer, p): | |
| output = layer*T.cast(mask, theano.config.floatX) | ||
| return output | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may be better to scale up the output right away |
||
|
|
||
| class HiddenDropoutLayer(HiddenLayer): | ||
|
|
||
| class HiddenDropoutLayer(HiddenLayer): | ||
| def __init__(self, rng, input, n_in, n_out, | ||
| activation, dropout_rate, W=None, b=None): | ||
|
|
||
| """ | ||
| Dropout layer of a MLP. Weight matrix W is of shape (n_in,n_out) | ||
| and the bias vector b is of shape (n_out,). | ||
|
|
||
| :type rng: numpy.random.RandomState | ||
| :param rng: a random number generator used to initialize weights | ||
|
|
||
| :type input: theano.tensor.dmatrix | ||
| :param input: a symbolic tensor of shape (n_examples, n_in) | ||
|
|
||
| :type n_in: int | ||
| :param n_in: dimensionality of input | ||
|
|
||
| :type n_out: int | ||
| :param n_out: number of hidden units | ||
|
|
||
| :type activation: theano.Op or function | ||
| :param activation: Non linearity to be applied in the hidden | ||
| layer | ||
| :type dropout_rate: list | ||
| :param dropout_rate: array containing probabilities of retaining a unit | ||
| """ | ||
| super(HiddenDropoutLayer, self).__init__( | ||
| rng=rng, input=input, n_in=n_in, n_out=n_out, W=W, b=b, | ||
| activation=activation) | ||
|
|
@@ -331,6 +155,9 @@ def __init__(self, rng, input, n_in, n_hidden, dropout_rates, n_out): | |
| :type n_hidden: int | ||
| :param n_hidden: number of hidden units | ||
|
|
||
| :type dropout_rate: list | ||
| :param dropout_rate: array containing probabilities of retaining a unit | ||
|
|
||
| :type n_out: int | ||
| :param n_out: number of output units, the dimension of the space in | ||
| which the labels lie | ||
|
|
@@ -416,21 +243,15 @@ def test_mlp(learning_rate=0.01, n_epochs=1000, dropout_rates = [0.2, 0.5], | |
| :param learning_rate: learning rate used (factor for the stochastic | ||
| gradient | ||
|
|
||
| :type L1_reg: float | ||
| :param L1_reg: L1-norm's weight when added to the cost (see | ||
| regularization) | ||
|
|
||
| :type L2_reg: float | ||
| :param L2_reg: L2-norm's weight when added to the cost (see | ||
| regularization) | ||
|
|
||
| :type n_epochs: int | ||
| :param n_epochs: maximal number of epochs to run the optimizer | ||
|
|
||
| :type dataset: string | ||
| :param dataset: the path of the MNIST dataset file from | ||
| http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz | ||
|
|
||
| :type dropout_rate: list | ||
| :param dropout_rate: array containing probabilities of retaining a unit | ||
|
|
||
| """ | ||
| datasets = load_data(dataset) | ||
|
|
@@ -516,7 +337,7 @@ def test_mlp(learning_rate=0.01, n_epochs=1000, dropout_rates = [0.2, 0.5], | |
| # element is a pair formed from the two lists : | ||
| # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] | ||
| #Stochastic Gradient Descent (SGD) updates | ||
|
|
||
| output = dropout_cost | ||
| updates = OrderedDict() | ||
| for param, gparam in zip(classifier.params, gparams) : | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,8 +58,10 @@ class LogisticRegression(object): | |
| determine a class membership probability. | ||
| """ | ||
|
|
||
| def __init__(self, input, n_in, n_out): | ||
| def __init__(self, input, n_in, n_out, W=None, b=None): | ||
| """ Initialize the parameters of the logistic regression | ||
| Weight matrix W is of shape (n_in,n_out) | ||
| and the bias vector b is of shape (n_out,). | ||
|
|
||
| :type input: theano.tensor.TensorType | ||
| :param input: symbolic variable that describes the input of the | ||
|
|
@@ -75,24 +77,23 @@ def __init__(self, input, n_in, n_out): | |
|
|
||
| """ | ||
| # start-snippet-1 | ||
| # initialize with 0 the weights W as a matrix of shape (n_in, n_out) | ||
| self.W = theano.shared( | ||
| value=numpy.zeros( | ||
| (n_in, n_out), | ||
| dtype=theano.config.floatX | ||
| ), | ||
| name='W', | ||
| borrow=True | ||
| ) | ||
| # initialize the biases b as a vector of n_out 0s | ||
| self.b = theano.shared( | ||
| value=numpy.zeros( | ||
| (n_out,), | ||
| dtype=theano.config.floatX | ||
| ), | ||
| name='b', | ||
| borrow=True | ||
| ) | ||
| # initialize with 0 the weights W as a matrix of shape (n_in, n_out) if | ||
| #the parameter W is None | ||
| if W is None: | ||
| self.W = theano.shared( | ||
| value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX), | ||
| name='W') | ||
| else: | ||
| self.W = W | ||
|
|
||
| # initialize the baises b as a vector of n_out 0s if the parameter b is | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "biases" |
||
| #not none | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "is None" |
||
| if b is None: | ||
| self.b = theano.shared( | ||
| value=numpy.zeros((n_out,), dtype=theano.config.floatX), | ||
| name='b') | ||
| else: | ||
| self.b = b | ||
|
|
||
| # symbolic expression for computing the matrix of class-membership | ||
| # probabilities | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can just import it from mlp.py, to avoid duplication.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I had wanted to do it in the previous commit, somehow slipped out of my mind. Will do