|
1 |
| - |
2 | 1 | """
|
3 |
| -Notes: Cost function is not implemented for IdentityConvNonlinearity, RectifierConvNonlinearity, |
4 |
| -TanhConvNonlinearity. It is bugged for SigmoidConvNonlinearity, but we are not triggering the |
5 |
| -bug here. The cost function is also not implemented for standard mlp RectifiedLinear or Tanh. |
| 2 | +Note: Cost functions are not implemented for RectifierConvNonlinearity, |
| 3 | +TanhConvNonlinearity, RectifiedLinear, and Tanh. Here we verify that |
| 4 | +cost functions for convolutional layers give the correct output |
| 5 | +by comparing to standard MLP's. |
6 | 6 | """
|
7 | 7 |
|
8 |
| - |
9 |
| -""" |
10 |
| -Test costs |
11 |
| -""" |
12 | 8 | import numpy as np
|
13 | 9 | import theano
|
14 | 10 | import theano.tensor as T
|
|
18 | 14 | from pylearn2.space import Conv2DSpace
|
19 | 15 | from pylearn2.models.mlp import SigmoidConvNonlinearity, TanhConvNonlinearity, IdentityConvNonlinearity, RectifierConvNonlinearity
|
20 | 16 |
|
21 |
| -#def test_costs(): |
22 |
| - |
23 |
| -# Create fake data |
24 |
| -np.random.seed(12345) |
25 |
| - |
26 |
| - |
27 |
| -r = 31 |
28 |
| -s = 21 |
29 |
| -shape = [r, s] |
30 |
| -nvis = r*s |
31 |
| -output_channels = 13 |
32 |
| -batch_size = 103 |
| 17 | +def test_costs(): |
33 | 18 |
|
34 |
| -x = np.random.rand(batch_size, r, s, 1) |
35 |
| -y = np.random.randint(2, size = [batch_size, output_channels, 1 ,1]) |
| 19 | + ImplementedNonLinearities = [[SigmoidConvNonlinearity(), Sigmoid], [IdentityConvNonlinearity(), Linear]] |
36 | 20 |
|
37 |
| -x = x.astype('float32') |
38 |
| -y = y.astype('float32') |
| 21 | + for ConvNonlinearity, MLPNonlinearity in ImplementedNonLinearities: |
39 | 22 |
|
40 |
| -x_mlp = x.flatten().reshape(batch_size, nvis) |
41 |
| -y_mlp = y.flatten().reshape(batch_size, output_channels) |
| 23 | + # Create fake data |
| 24 | + np.random.seed(12345) |
42 | 25 |
|
43 |
| -nonlinearity = IdentityConvNonlinearity() |
| 26 | + r = 31 |
| 27 | + s = 21 |
| 28 | + shape = [r, s] |
| 29 | + nvis = r*s |
| 30 | + output_channels = 13 |
| 31 | + batch_size = 103 |
44 | 32 |
|
45 |
| -# Initialize convnet with random weights. |
| 33 | + x = np.random.rand(batch_size, r, s, 1).astype('float32') |
| 34 | + y = np.random.randint(2, size = [batch_size, output_channels, 1 ,1]).astype('float32') |
46 | 35 |
|
47 |
| -conv_model = MLP( |
48 |
| - input_space = Conv2DSpace(shape = shape, axes = ['b', 0, 1, 'c'], num_channels = 1), |
49 |
| - layers = [ConvElemwise(layer_name='conv', nonlinearity = nonlinearity, output_channels = output_channels, kernel_shape = shape, pool_shape = [1,1], pool_stride = shape, irange= 1.0)], |
50 |
| - batch_size = batch_size |
51 |
| -) |
| 36 | + x_mlp = x.flatten().reshape(batch_size, nvis) |
| 37 | + y_mlp = y.flatten().reshape(batch_size, output_channels) |
52 | 38 |
|
53 |
| -X = conv_model.get_input_space().make_theano_batch() |
54 |
| -Y = conv_model.get_target_space().make_theano_batch() |
55 |
| -Y_hat = conv_model.fprop(X) |
56 |
| -g = theano.function([X], Y_hat) |
| 39 | + # Initialize convnet with random weights. |
57 | 40 |
|
58 |
| -# Construct an equivalent MLP which gives the same output. |
| 41 | + conv_model = MLP( |
| 42 | + input_space = Conv2DSpace(shape = shape, axes = ['b', 0, 1, 'c'], num_channels = 1), |
| 43 | + layers = [ConvElemwise(layer_name='conv', nonlinearity = ConvNonlinearity, output_channels = output_channels, kernel_shape = shape, pool_shape = [1,1], pool_stride = shape, irange= 1.0)], |
| 44 | + batch_size = batch_size |
| 45 | + ) |
59 | 46 |
|
60 |
| -mlp_model = MLP( |
61 |
| - layers = [Linear(dim = output_channels, layer_name = 'mlp', irange = 1.0)], |
62 |
| - batch_size = batch_size, |
63 |
| - nvis = nvis |
64 |
| -) |
| 47 | + X = conv_model.get_input_space().make_theano_batch() |
| 48 | + Y = conv_model.get_target_space().make_theano_batch() |
| 49 | + Y_hat = conv_model.fprop(X).flatten() |
| 50 | + g = theano.function([X], Y_hat) |
65 | 51 |
|
66 |
| -W, b = conv_model.get_param_values() |
67 |
| -W = W.astype('float32') |
68 |
| -b = b.astype('float32') |
69 |
| -W_mlp = np.zeros(shape = (output_channels, nvis)) |
70 |
| -for k in range(output_channels): |
71 |
| - W_mlp[k] = W[k, 0].flatten()[::-1] |
72 |
| -W_mlp = W_mlp.T |
73 |
| -b_mlp = b.flatten() |
74 |
| -W_mlp = W_mlp.astype('float32') |
75 |
| -b_mlp = b_mlp.astype('float32') |
76 |
| -mlp_model.set_param_values([W_mlp, b_mlp]) |
| 52 | + # Construct an equivalent MLP which gives the same output after flattening. |
77 | 53 |
|
78 |
| -X1 = mlp_model.get_input_space().make_theano_batch() |
79 |
| -Y1 = mlp_model.get_target_space().make_theano_batch() |
80 |
| -Y1_hat = mlp_model.fprop(X1) |
81 |
| -f = theano.function([X1], Y1_hat) |
| 54 | + mlp_model = MLP( |
| 55 | + layers = [MLPNonlinearity(dim = output_channels, layer_name = 'mlp', irange = 1.0)], |
| 56 | + batch_size = batch_size, |
| 57 | + nvis = nvis |
| 58 | + ) |
82 | 59 |
|
| 60 | + W, b = conv_model.get_param_values() |
| 61 | + W = W.astype('float32') |
| 62 | + b = b.astype('float32') |
| 63 | + W_mlp = np.zeros(shape = (output_channels, nvis)) |
| 64 | + for k in range(output_channels): |
| 65 | + W_mlp[k] = W[k, 0].flatten()[::-1] |
| 66 | + W_mlp = W_mlp.T |
| 67 | + b_mlp = b.flatten() |
| 68 | + W_mlp = W_mlp.astype('float32') |
| 69 | + b_mlp = b_mlp.astype('float32') |
| 70 | + mlp_model.set_param_values([W_mlp, b_mlp]) |
83 | 71 |
|
84 |
| -# Check that the two models give the same throughput |
85 |
| -assert np.linalg.norm(f(x_mlp).flatten() - g(x).flatten()) < 10**-3 |
86 |
| -print "f-prop ok" |
| 72 | + X1 = mlp_model.get_input_space().make_theano_batch() |
| 73 | + Y1 = mlp_model.get_target_space().make_theano_batch() |
| 74 | + Y1_hat = mlp_model.fprop(X1).flatten() |
| 75 | + f = theano.function([X1], Y1_hat) |
87 | 76 |
|
88 |
| -# Cost functions: |
89 |
| -mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat)) |
90 |
| -print "mlp_cost = "+str(mlp_cost(x_mlp, y_mlp)) |
91 | 77 |
|
92 |
| -batch_axis = T.scalar() |
93 |
| -conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat)) |
94 |
| -print "conv_cost = "+str(conv_cost(x,y)) |
| 78 | + # Check that the two models give the same output |
| 79 | + assert np.linalg.norm(f(x_mlp) - g(x)) < 10**-3 |
95 | 80 |
|
| 81 | +if __name__ == "__main__": |
| 82 | + test_costs() |
96 | 83 |
|
0 commit comments