Skip to content

Commit f27c5b0

Browse files
committed
merge conflict
2 parents 523e24e + 8d393f7 commit f27c5b0

25 files changed

+696
-410
lines changed

docs/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdo
88
- install MkDocs: `pip install mkdocs`
99
- `cd` to the `docs/` folder and run:
1010
- `python autogen.py`
11-
- `mkdocs serve`
11+
- `mkdocs serve` # Starts a local webserver: [localhost:8000](localhost:8000)
12+
- `mkdocs build` # Builds a static site in "site" directory

docs/autogen.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ def code_snippet(snippet):
117117

118118

119119
def process_class_docstring(docstring):
120-
docstring = re.sub(r' # (.*)\n',
121-
r' __\1__\n\n',
120+
docstring = re.sub(r'\n # (.*)\n',
121+
r'\n __\1__\n\n',
122122
docstring)
123123

124124
docstring = re.sub(r' ([^\s\\]+):(.*)\n',

docs/templates/objectives.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ For a few examples of such functions, check out the [objectives source](https://
1919
## Available objectives
2020

2121
- __mean_squared_error__ / __mse__
22-
- __root_mean_squared_error__ / __rmse__
2322
- __mean_absolute_error__ / __mae__
2423
- __mean_absolute_percentage_error__ / __mape__
2524
- __mean_squared_logarithmic_error__ / __msle__

examples/mnist_siamese_graph.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
'''Train a Siamese MLP on pairs of digits from the MNIST dataset.
2+
3+
It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the
4+
output of the shared network and by optimizing the contrastive loss (see paper
5+
for mode details).
6+
7+
[1] "Dimensionality Reduction by Learning an Invariant Mapping"
8+
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
9+
10+
Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python mnist_siamese_graph.py
11+
12+
Gets to 99.5% test accuracy after 20 epochs.
13+
3 seconds per epoch on a Titan X GPU
14+
'''
15+
from __future__ import absolute_import
16+
from __future__ import print_function
17+
import numpy as np
18+
np.random.seed(1337) # for reproducibility
19+
20+
import random
21+
from keras.datasets import mnist
22+
from keras.models import Sequential, Graph
23+
from keras.layers.core import Dense, Dropout, Lambda
24+
from keras.optimizers import SGD, RMSprop
25+
from keras import backend as K
26+
27+
28+
def euclidean_distance(inputs):
29+
assert len(inputs) == 2, ('Euclidean distance needs '
30+
'2 inputs, %d given' % len(inputs))
31+
u, v = inputs.values()
32+
return K.sqrt(K.sum(K.square(u - v), axis=1, keepdims=True))
33+
34+
35+
def contrastive_loss(y, d):
36+
'''Contrastive loss from Hadsell-et-al.'06
37+
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
38+
'''
39+
margin = 1
40+
return K.mean(y * K.square(d) + (1 - y) * K.square(K.maximum(margin - d, 0)))
41+
42+
43+
def create_pairs(x, digit_indices):
44+
'''Positive and negative pair creation.
45+
Alternates between positive and negative pairs.
46+
'''
47+
pairs = []
48+
labels = []
49+
n = min([len(digit_indices[d]) for d in range(10)]) - 1
50+
for d in range(10):
51+
for i in range(n):
52+
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
53+
pairs += [[x[z1], x[z2]]]
54+
inc = random.randrange(1, 10)
55+
dn = (d + inc) % 10
56+
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
57+
pairs += [[x[z1], x[z2]]]
58+
labels += [1, 0]
59+
return np.array(pairs), np.array(labels)
60+
61+
62+
def create_base_network(input_dim):
63+
'''Base network to be shared (eq. to feature extraction).
64+
'''
65+
seq = Sequential()
66+
seq.add(Dense(128, input_shape=(input_dim,), activation='relu'))
67+
seq.add(Dropout(0.1))
68+
seq.add(Dense(128, activation='relu'))
69+
seq.add(Dropout(0.1))
70+
seq.add(Dense(128, activation='relu'))
71+
return seq
72+
73+
74+
def compute_accuracy(predictions, labels):
75+
'''Compute classification accuracy with a fixed threshold on distances.
76+
'''
77+
return labels[predictions.ravel() < 0.5].mean()
78+
79+
80+
# the data, shuffled and split between tran and test sets
81+
(X_train, y_train), (X_test, y_test) = mnist.load_data()
82+
X_train = X_train.reshape(60000, 784)
83+
X_test = X_test.reshape(10000, 784)
84+
X_train = X_train.astype('float32')
85+
X_test = X_test.astype('float32')
86+
X_train /= 255
87+
X_test /= 255
88+
input_dim = 784
89+
nb_epoch = 20
90+
91+
# create training+test positive and negative pairs
92+
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
93+
tr_pairs, tr_y = create_pairs(X_train, digit_indices)
94+
95+
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
96+
te_pairs, te_y = create_pairs(X_test, digit_indices)
97+
98+
# network definition
99+
base_network = create_base_network(input_dim)
100+
101+
g = Graph()
102+
g.add_input(name='input_a', input_shape=(input_dim,))
103+
g.add_input(name='input_b', input_shape=(input_dim,))
104+
g.add_shared_node(base_network, name='shared', inputs=['input_a', 'input_b'],
105+
merge_mode='join')
106+
g.add_node(Lambda(euclidean_distance), name='d', input='shared')
107+
g.add_output(name='output', input='d')
108+
109+
# train
110+
rms = RMSprop()
111+
g.compile(loss={'output': contrastive_loss}, optimizer=rms)
112+
g.fit({'input_a': tr_pairs[:, 0], 'input_b': tr_pairs[:, 1], 'output': tr_y},
113+
validation_data={'input_a': te_pairs[:, 0], 'input_b': te_pairs[:, 1], 'output': te_y},
114+
batch_size=128,
115+
nb_epoch=nb_epoch)
116+
117+
# compute final accuracy on training and test sets
118+
pred = g.predict({'input_a': tr_pairs[:, 0], 'input_b': tr_pairs[:, 1]})['output']
119+
tr_acc = compute_accuracy(pred, tr_y)
120+
pred = g.predict({'input_a': te_pairs[:, 0], 'input_b': te_pairs[:, 1]})['output']
121+
te_acc = compute_accuracy(pred, te_y)
122+
123+
print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
124+
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))

keras/backend/tensorflow_backend.py

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,12 @@ def mean(x, axis=None, keepdims=False):
154154
def any(x, axis=None, keepdims=False):
155155
'''Bitwise reduction (logical OR).
156156
157-
Return array of int8 (0s and 1s).
157+
Return array of uint8 (0s and 1s).
158158
'''
159159
axis = normalize_axis(axis, ndim(x))
160160
x = tf.cast(x, tf.bool)
161161
x = tf.reduce_any(x, reduction_indices=axis, keep_dims=keepdims)
162-
return tf.cast(x, tf.int8)
162+
return tf.cast(x, tf.uint8)
163163

164164

165165
def argmax(x, axis=-1):
@@ -289,6 +289,7 @@ def repeat(x, n):
289289
if x has shape (samples, dim) and n=2,
290290
the output will have shape (samples, 2, dim)
291291
'''
292+
assert ndim(x) == 2
292293
tensors = [x] * n
293294
stacked = tf.pack(tensors)
294295
return tf.transpose(stacked, (1, 0, 2))
@@ -429,54 +430,53 @@ def rnn(step_function, inputs, initial_states,
429430
axes = [1, 0] + list(range(2, ndim))
430431
inputs = tf.transpose(inputs, (axes))
431432
input_list = tf.unpack(inputs)
432-
if mask is None:
433-
mask = ones_like(tf.slice(inputs, [0, 0, 0], [-1, -1, 1]))
434-
inputs_shape = inputs.get_shape()
435-
436-
# TODO: the mask's shape should be automatically inferred, by
437-
# tensorflow yet for some reason it fails to in some test-cases. This
438-
# fixes the issue, but should be removed in future.
439-
mask.set_shape([inputs_shape[0].value, inputs_shape[1].value, 1])
440-
mask = tf.cast(mask, tf.bool)
441-
else:
442-
# Transpose not supported by bool tensor types, hence round-trip to uint8.
443-
mask = tf.cast(tf.transpose(tf.cast(mask, tf.uint8), axes), tf.bool)
444-
445-
mask_list = tf.unpack(mask)
446433

447434
states = initial_states
448435
successive_states = []
449436
successive_outputs = []
450437
if go_backwards:
451438
input_list.reverse()
452439

453-
for input, mask_t in zip(input_list, mask_list):
454-
output, new_states = step_function(input, states)
455-
456-
# tf.select needs its condition tensor to be the same shape as its two
457-
# result tensors, but in our case the condition (mask) tensor is
458-
# (nsamples, 1), and A and B are (nsamples, ndimensions). So we need to
459-
# broadcast the mask to match the shape of A and B. That's what the
460-
# tile call does, is just repeat the mask along its second dimension
461-
# ndimensions times.
462-
tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]]))
463-
464-
if len(successive_outputs) == 0:
465-
prev_output = zeros_like(output)
466-
else:
467-
prev_output = successive_outputs[-1]
468-
469-
output = tf.select(tiled_mask_t, output, prev_output)
470-
471-
return_states = []
472-
for state, new_state in zip(states, new_states):
473-
# (see earlier comment for tile explanation)
474-
tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(new_state)[1]]))
475-
return_states.append(tf.select(tiled_mask_t, new_state, state))
476-
477-
states = return_states
478-
successive_outputs.append(output)
479-
successive_states.append(states)
440+
if mask is not None:
441+
# Transpose not supported by bool tensor types, hence round-trip to uint8.
442+
mask = tf.cast(mask, tf.uint8)
443+
if len(mask.get_shape()) == ndim-1:
444+
mask = expand_dims(mask)
445+
mask = tf.cast(tf.transpose(mask, axes), tf.bool)
446+
mask_list = tf.unpack(mask)
447+
448+
for input, mask_t in zip(input_list, mask_list):
449+
output, new_states = step_function(input, states)
450+
451+
# tf.select needs its condition tensor to be the same shape as its two
452+
# result tensors, but in our case the condition (mask) tensor is
453+
# (nsamples, 1), and A and B are (nsamples, ndimensions). So we need to
454+
# broadcast the mask to match the shape of A and B. That's what the
455+
# tile call does, is just repeat the mask along its second dimension
456+
# ndimensions times.
457+
tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]]))
458+
459+
if len(successive_outputs) == 0:
460+
prev_output = zeros_like(output)
461+
else:
462+
prev_output = successive_outputs[-1]
463+
464+
output = tf.select(tiled_mask_t, output, prev_output)
465+
466+
return_states = []
467+
for state, new_state in zip(states, new_states):
468+
# (see earlier comment for tile explanation)
469+
tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(new_state)[1]]))
470+
return_states.append(tf.select(tiled_mask_t, new_state, state))
471+
472+
states = return_states
473+
successive_outputs.append(output)
474+
successive_states.append(states)
475+
else:
476+
for input in input_list:
477+
output, states = step_function(input, states)
478+
successive_outputs.append(output)
479+
successive_states.append(states)
480480

481481
last_output = successive_outputs[-1]
482482
outputs = tf.pack(successive_outputs)

keras/backend/theano_backend.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -41,18 +41,8 @@ def placeholder(shape=None, ndim=None, dtype=_FLOATX, name=None):
4141
raise Exception('Specify either a shape or ndim value.')
4242
if shape is not None:
4343
ndim = len(shape)
44-
if ndim == 0:
45-
return T.scalar(name=name, dtype=dtype)
46-
elif ndim == 1:
47-
return T.vector(name=name, dtype=dtype)
48-
elif ndim == 2:
49-
return T.matrix(name=name, dtype=dtype)
50-
elif ndim == 3:
51-
return T.tensor3(name=name, dtype=dtype)
52-
elif ndim == 4:
53-
return T.tensor4(name=name, dtype=dtype)
54-
else:
55-
raise Exception('ndim too large: ' + str(ndim))
44+
broadcast = (False,) * ndim
45+
return T.TensorType(dtype, broadcast)(name)
5646

5747

5848
def shape(x):
@@ -281,9 +271,9 @@ def repeat(x, n):
281271
If x has shape (samples, dim) and n=2,
282272
the output will have shape (samples, 2, dim).
283273
'''
284-
tensors = [x] * n
285-
stacked = T.stack(*tensors)
286-
return stacked.dimshuffle((1, 0, 2))
274+
assert x.ndim == 2
275+
x = x.dimshuffle((0, 'x', 1))
276+
return T.extra_ops.repeat(x, n, axis=1)
287277

288278

289279
def tile(x, n):
@@ -427,7 +417,7 @@ def rnn(step_function, inputs, initial_states,
427417
the step function.
428418
go_backwards: boolean. If True, do the iteration over
429419
the time dimension in reverse order.
430-
mask: binary tensor with shape (samples, time, 1),
420+
mask: binary tensor with shape (samples, time),
431421
with a zero for every element that is masked.
432422
433423
Returns
@@ -447,7 +437,11 @@ def rnn(step_function, inputs, initial_states,
447437
inputs = inputs.dimshuffle(axes)
448438

449439
if mask is not None:
440+
if mask.ndim == ndim-1:
441+
mask = expand_dims(mask)
442+
assert mask.ndim == ndim
450443
mask = mask.dimshuffle(axes)
444+
451445
# build an all-zero tensor of shape (samples, output_dim)
452446
initial_output = step_function(inputs[0], initial_states)[0] * 0
453447
# Theano gets confused by broadcasting patterns in the scan op
@@ -674,6 +668,7 @@ def pool2d(x, pool_size, strides=(1, 1), border_mode='valid',
674668
pool_out = pool_out.dimshuffle((0, 2, 3, 1))
675669
return pool_out
676670

671+
677672
# RANDOMNESS
678673

679674

keras/layers/advanced_activations.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
class LeakyReLU(MaskedLayer):
88
'''Special version of a Rectified Linear Unit
9-
that allows a small gradient when the unit is not active
10-
(`f(x) = alpha*x for x < 0`).
9+
that allows a small gradient when the unit is not active:
10+
`f(x) = alpha*x for x < 0`.
1111
1212
# Input shape
1313
Arbitrary. Use the keyword argument `input_shape`
@@ -60,7 +60,7 @@ def __init__(self, init='zero', weights=None, **kwargs):
6060
def build(self):
6161
input_shape = self.input_shape[1:]
6262
self.alphas = self.init(input_shape)
63-
self.params = [self.alphas]
63+
self.trainable_weights = [self.alphas]
6464

6565
if self.initial_weights is not None:
6666
self.set_weights(self.initial_weights)
@@ -142,7 +142,7 @@ def build(self):
142142
input_shape = self.input_shape[1:]
143143
self.alphas = K.variable(self.alpha_init * np.ones(input_shape))
144144
self.betas = K.variable(self.beta_init * np.ones(input_shape))
145-
self.params = [self.alphas, self.betas]
145+
self.trainable_weights = [self.alphas, self.betas]
146146

147147
if self.initial_weights is not None:
148148
self.set_weights(self.initial_weights)

0 commit comments

Comments
 (0)