Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions code/DBN.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
c.append(pretraining_fns[i](index=batch_index,
lr=pretrain_lr))
print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')
print(numpy.mean(c))
print(numpy.mean(c, dtype='float64'))

end_time = timeit.default_timer()
# end-snippet-2
Expand Down Expand Up @@ -391,7 +391,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
if (iter + 1) % validation_frequency == 0:

validation_losses = validate_model()
this_validation_loss = numpy.mean(validation_losses)
this_validation_loss = numpy.mean(validation_losses, dtype='float64')
print('epoch %i, minibatch %i/%i, validation error %f %%' % (
epoch,
minibatch_index + 1,
Expand All @@ -414,7 +414,7 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,

# test it on the test set
test_losses = test_model()
test_score = numpy.mean(test_losses)
test_score = numpy.mean(test_losses, dtype='float64')
print((' epoch %i, minibatch %i/%i, test error of '
'best model %f %%') %
(epoch, minibatch_index + 1, n_train_batches,
Expand Down
8 changes: 4 additions & 4 deletions code/SdA.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer
Expand Down Expand Up @@ -394,7 +394,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
c.append(pretraining_fns[i](index=batch_index,
corruption=corruption_levels[i],
lr=pretrain_lr))
print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c)))
print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c, dtype='float64')))

end_time = timeit.default_timer()

Expand Down Expand Up @@ -442,7 +442,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,

if (iter + 1) % validation_frequency == 0:
validation_losses = validate_model()
this_validation_loss = numpy.mean(validation_losses)
this_validation_loss = numpy.mean(validation_losses, dtype='float64')
print('epoch %i, minibatch %i/%i, validation error %f %%' %
(epoch, minibatch_index + 1, n_train_batches,
this_validation_loss * 100.))
Expand All @@ -463,7 +463,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,

# test it on the test set
test_losses = test_model()
test_score = numpy.mean(test_losses)
test_score = numpy.mean(test_losses, dtype='float64')
print((' epoch %i, minibatch %i/%i, test error of '
'best model %f %%') %
(epoch, minibatch_index + 1, n_train_batches,
Expand Down
6 changes: 3 additions & 3 deletions code/dA.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

from logistic_sgd import load_data
from utils import tile_raster_images
Expand Down Expand Up @@ -336,7 +336,7 @@ def test_dA(learning_rate=0.1, training_epochs=15,
for batch_index in range(n_train_batches):
c.append(train_da(batch_index))

print('Training epoch %d, cost ' % epoch, numpy.mean(c))
print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64'))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know what is going on about numpy.mean? if c are float16 output I checked that numpy output float16. In my tests, it seem the accumulator is in float16 or something like that. Do you know?

We would need to document that in Theano about float16. At least in this issue: Theano/Theano#2908. I let you modify it, in case you can add more information.

Should we special case float16 and make Theano always return at least float32 to help prevent that type of problems?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes the accumulator is float16 internally and overflows.

Do we have a page about float16 gotchas in Theano. This is the only place where I would see this sort of information.

I would oppose special-casing outputs in Theano, because the problem is easily resolved by the user and very visible.


end_time = timeit.default_timer()

Expand Down Expand Up @@ -394,7 +394,7 @@ def test_dA(learning_rate=0.1, training_epochs=15,
for batch_index in range(n_train_batches):
c.append(train_da(batch_index))

print('Training epoch %d, cost ' % epoch, numpy.mean(c))
print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64'))

end_time = timeit.default_timer()

Expand Down
9 changes: 5 additions & 4 deletions code/hmc/hmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from theano import function, shared
from theano import tensor as TT
import theano
import theano.sandbox.rng_mrg

sharedX = (lambda X, name:
shared(numpy.asarray(X, dtype=theano.config.floatX), name=name))
Expand Down Expand Up @@ -275,14 +276,14 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,

"""

## POSITION UPDATES ##
# POSITION UPDATES #
# broadcast `accept` scalar to tensor with the same dimensions as
# final_pos.
accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1)))
# if accept is True, update to `final_pos` else stay put
new_positions = TT.switch(accept_matrix, final_pos, positions)
# end-snippet-5 start-snippet-7
## STEPSIZE UPDATES ##
# STEPSIZE UPDATES #
# if acceptance rate is too low, our sampler is too "noisy" and we reduce
# the stepsize. If it is too high, our sampler is too conservative, we can
# get away with a larger stepsize (resulting in better mixing).
Expand All @@ -292,7 +293,7 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max)

# end-snippet-7 start-snippet-6
## ACCEPT RATE UPDATES ##
# ACCEPT RATE UPDATES #
# perform exponential moving average
mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype)
new_acceptance_rate = TT.add(
Expand Down Expand Up @@ -358,7 +359,7 @@ def new_from_shared_positions(
stepsize = sharedX(initial_stepsize, 'hmc_stepsize')
avg_acceptance_rate = sharedX(target_acceptance_rate,
'avg_acceptance_rate')
s_rng = TT.shared_randomstreams.RandomStreams(seed)
s_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(seed)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AttributeError: 'module' object has no attribute 'sandbox'


# define graph for an `n_steps` HMC simulation
accept, final_pos = hmc_move(
Expand Down
4 changes: 2 additions & 2 deletions code/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,8 +605,8 @@ def train_lstm(
best_p = unzip(tparams)
bad_counter = 0

print( ('Train ', train_err, 'Valid ', valid_err,
'Test ', test_err) )
print('Train ', train_err, 'Valid ', valid_err,
'Test ', test_err)

if (len(history_errs) > patience and
valid_err >= numpy.array(history_errs)[:-patience,
Expand Down
2 changes: 1 addition & 1 deletion code/rbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import theano.tensor as T
import os

from theano.tensor.shared_randomstreams import RandomStreams
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

from utils import tile_raster_images
from logistic_sgd import load_data
Expand Down
2 changes: 1 addition & 1 deletion code/rnnrbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from midi.utils import midiread, midiwrite
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

#Don't use a python long as this don't work on 32 bits computers.
numpy.random.seed(0xbeef)
Expand Down