Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added docstrings
  • Loading branch information
Sentient07 committed Mar 8, 2016
commit ff3f08589fac489e8feb8175c6daedc1913edb2e
241 changes: 31 additions & 210 deletions code/dropout.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,208 +10,9 @@
import theano.tensor.shared_randomstreams
import gzip
from collections import OrderedDict
from logistic_sgd import LogisticRegression, load_data


class LogisticRegression(object):
"""Multi-class Logistic Regression Class

The logistic regression is fully described by a weight matrix :math:`W`
and bias vector :math:`b`. Classification is done by projecting data
points onto a set of hyperplanes, the distance to which is used to
determine a class membership probability.
"""

def __init__(self, input, n_in, n_out, W=None, b=None):
""" Initialize the parameters of the logistic regression

:type input: theano.tensor.TensorType
:param input: symbolic variable that describes the input of the
architecture (one minibatch)

:type n_in: int
:param n_in: number of input units, the dimension of the space in
which the datapoints lie

:type n_out: int
:param n_out: number of output units, the dimension of the space in
which the labels lie

"""
# start-snippet-1
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
if W is None:
self.W = theano.shared(
value=np.zeros((n_in, n_out), dtype=theano.config.floatX),
name='W')
else:
self.W = W

# initialize the baises b as a vector of n_out 0s
if b is None:
self.b = theano.shared(
value=np.zeros((n_out,), dtype=theano.config.floatX),
name='b')
else:
self.b = b

# symbolic expression for computing the matrix of class-membership
# probabilities
# Where:
# W is a matrix where column-k represent the separation hyperplane for
# class-k
# x is a matrix where row-j represents input training sample-j
# b is a vector where element-k represent the free parameter of
# hyperplane-k
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)

# symbolic description of how to compute prediction as class whose
# probability is maximal
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
# end-snippet-1

# parameters of the model
self.params = [self.W, self.b]

# keep track of model input
self.input = input

def negative_log_likelihood(self, y):
"""Return the mean of the negative log-likelihood of the prediction
of this model under a given target distribution.

.. math::

\frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
\frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
\log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
\ell (\theta=\{W,b\}, \mathcal{D})

:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label

Note: we use the mean instead of the sum so that
the learning rate is less dependent on the batch size
"""
# start-snippet-2
# y.shape[0] is (symbolically) the number of rows in y, i.e.,
# number of examples (call it n) in the minibatch
# T.arange(y.shape[0]) is a symbolic vector which will contain
# [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
# Log-Probabilities (call it LP) with one row per example and
# one column per class LP[T.arange(y.shape[0]),y] is a vector
# v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
# LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
# the mean (across minibatch examples) of the elements in v,
# i.e., the mean log-likelihood across the minibatch.
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
# end-snippet-2

def errors(self, y):
"""Return a float representing the number of errors in the minibatch
over the total number of examples of the minibatch ; zero one
loss over the size of the minibatch

:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
"""

# check if y has same dimension of y_pred
if y.ndim != self.y_pred.ndim:
raise TypeError(
'y should have the same shape as self.y_pred',
('y', y.type, 'y_pred', self.y_pred.type)
)
# check if y is of the correct datatype
if y.dtype.startswith('int'):
# the T.neq operator returns a vector of 0s and 1s, where 1
# represents a mistake in prediction
return T.mean(T.neq(self.y_pred, y))
else:
raise NotImplementedError()


def load_data(dataset):
''' Loads the dataset

:type dataset: string
:param dataset: the path to the dataset (here MNIST)
'''

#############
# LOAD DATA #
#############

# Download the MNIST dataset if it is not present
data_dir, data_file = os.path.split(dataset)
if data_dir == "" and not os.path.isfile(dataset):
# Check if dataset is in the data directory.
new_path = os.path.join(
os.path.split(__file__)[0],
"..",
"data",
dataset
)
if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
dataset = new_path

if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
from six.moves import urllib
origin = (
'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
)
print('Downloading data from %s' % origin)
urllib.request.urlretrieve(origin, dataset)

print('... loading data')

# Load the dataset
with gzip.open(dataset, 'rb') as f:
try:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
except:
train_set, valid_set, test_set = pickle.load(f)
# train_set, valid_set, test_set format: tuple(input, target)
# input is a numpy.ndarray of 2 dimensions (a matrix)
# where each row corresponds to an example. target is a
# numpy.ndarray of 1 dimension (vector) that has the same length as
# the number of rows in the input. It should give the target
# to the example with the same index in the input.

def shared_dataset(data_xy, borrow=True):
""" Function that loads the dataset into shared variables

The reason we store our dataset in shared variables is to allow
Theano to copy it into the GPU memory (when code is run on GPU).
Since copying data into the GPU is slow, copying a minibatch everytime
is needed (the default behaviour if the data is not in a shared
variable) would lead to a large decrease in performance.
"""
data_x, data_y = data_xy
shared_x = theano.shared(np.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
shared_y = theano.shared(np.asarray(data_y,
dtype=theano.config.floatX),
borrow=borrow)
# When storing data on the GPU it has to be stored as floats
# therefore we will store the labels as ``floatX`` as well
# (``shared_y`` does exactly that). But during our computations
# we need them as ints (we use labels as index, and if they are
# floats it doesn't make sense) therefore instead of returning
# ``shared_y`` we will have to cast it to int. This little hack
# lets ous get around this issue
return shared_x, T.cast(shared_y, 'int32')

test_set_x, test_set_y = shared_dataset(test_set)
valid_set_x, valid_set_y = shared_dataset(valid_set)
train_set_x, train_set_y = shared_dataset(train_set)

rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
(test_set_x, test_set_y)]
return rval


class HiddenLayer(object):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can just import it from mlp.py, to avoid duplication.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I had wanted to do it in the previous commit, somehow slipped out of my mind. Will do

def __init__(self, rng, input, n_in, n_out, W=None, b=None,
Expand Down Expand Up @@ -292,10 +93,33 @@ def _dropsout(rng, layer, p):
output = layer*T.cast(mask, theano.config.floatX)
return output
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be better to scale up the output right away


class HiddenDropoutLayer(HiddenLayer):

class HiddenDropoutLayer(HiddenLayer):
def __init__(self, rng, input, n_in, n_out,
activation, dropout_rate, W=None, b=None):

"""
Dropout layer of a MLP. Weight matrix W is of shape (n_in,n_out)
and the bias vector b is of shape (n_out,).

:type rng: numpy.random.RandomState
:param rng: a random number generator used to initialize weights

:type input: theano.tensor.dmatrix
:param input: a symbolic tensor of shape (n_examples, n_in)

:type n_in: int
:param n_in: dimensionality of input

:type n_out: int
:param n_out: number of hidden units

:type activation: theano.Op or function
:param activation: Non linearity to be applied in the hidden
layer
:type dropout_rate: list
:param dropout_rate: array containing probabilities of retaining a unit
"""
super(HiddenDropoutLayer, self).__init__(
rng=rng, input=input, n_in=n_in, n_out=n_out, W=W, b=b,
activation=activation)
Expand Down Expand Up @@ -331,6 +155,9 @@ def __init__(self, rng, input, n_in, n_hidden, dropout_rates, n_out):
:type n_hidden: int
:param n_hidden: number of hidden units

:type dropout_rate: list
:param dropout_rate: array containing probabilities of retaining a unit

:type n_out: int
:param n_out: number of output units, the dimension of the space in
which the labels lie
Expand Down Expand Up @@ -416,21 +243,15 @@ def test_mlp(learning_rate=0.01, n_epochs=1000, dropout_rates = [0.2, 0.5],
:param learning_rate: learning rate used (factor for the stochastic
gradient

:type L1_reg: float
:param L1_reg: L1-norm's weight when added to the cost (see
regularization)

:type L2_reg: float
:param L2_reg: L2-norm's weight when added to the cost (see
regularization)

:type n_epochs: int
:param n_epochs: maximal number of epochs to run the optimizer

:type dataset: string
:param dataset: the path of the MNIST dataset file from
http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

:type dropout_rate: list
:param dropout_rate: array containing probabilities of retaining a unit

"""
datasets = load_data(dataset)
Expand Down Expand Up @@ -516,7 +337,7 @@ def test_mlp(learning_rate=0.01, n_epochs=1000, dropout_rates = [0.2, 0.5],
# element is a pair formed from the two lists :
# C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
#Stochastic Gradient Descent (SGD) updates

output = dropout_cost
updates = OrderedDict()
for param, gparam in zip(classifier.params, gparams) :
Expand Down
39 changes: 20 additions & 19 deletions code/logistic_sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ class LogisticRegression(object):
determine a class membership probability.
"""

def __init__(self, input, n_in, n_out):
def __init__(self, input, n_in, n_out, W=None, b=None):
""" Initialize the parameters of the logistic regression
Weight matrix W is of shape (n_in,n_out)
and the bias vector b is of shape (n_out,).

:type input: theano.tensor.TensorType
:param input: symbolic variable that describes the input of the
Expand All @@ -75,24 +77,23 @@ def __init__(self, input, n_in, n_out):

"""
# start-snippet-1
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
self.W = theano.shared(
value=numpy.zeros(
(n_in, n_out),
dtype=theano.config.floatX
),
name='W',
borrow=True
)
# initialize the biases b as a vector of n_out 0s
self.b = theano.shared(
value=numpy.zeros(
(n_out,),
dtype=theano.config.floatX
),
name='b',
borrow=True
)
# initialize with 0 the weights W as a matrix of shape (n_in, n_out) if
#the parameter W is None
if W is None:
self.W = theano.shared(
value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX),
name='W')
else:
self.W = W

# initialize the baises b as a vector of n_out 0s if the parameter b is
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"biases"

#not none
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"is None"

if b is None:
self.b = theano.shared(
value=numpy.zeros((n_out,), dtype=theano.config.floatX),
name='b')
else:
self.b = b

# symbolic expression for computing the matrix of class-membership
# probabilities
Expand Down