Fixed cost functions for IdentityConvNonlinear and SigmoidConvNonlinear classes.

Mathew Donald Rogers · Mathew Donald Rogers · commit cfa2018a1ca0 · 2015-06-29T21:19:16.000-04:00
diff --git a/pylearn2/models/mlp.py b/pylearn2/models/mlp.py
@@ -2673,6 +2673,13 @@ def get_monitoring_channels_from_state(self, state, target,
         rval = self._get_monitoring_channels_for_activations(state)
 
         return rval
+    
+    @wraps(Layer.cost)
+    def cost(self, Y, Y_hat):
+        raise NotImplementedError(
+            str(type(self)) + " does not implement cost function.")
+
+
 
 
 class IdentityConvNonlinearity(ConvNonlinearity):
@@ -2701,6 +2708,30 @@ def get_monitoring_channels_from_state(self,
             rval["misclass"] = T.cast(incorrect, config.floatX).mean()
 
         return rval
+    
+    @wraps(Linear.cost)
+    def cost(self, Y, Y_hat, batch_axis):
+        """
+        Parameters
+        ----------
+        Y : theano.gof.Variable
+            Output of `fprop`
+        Y_hat : theano.gof.Variable
+            Targets
+        batch_axis : integer 
+            axis representing batch dimension
+
+        Returns
+        -------
+        cost : theano.gof.Variable
+            0-D tensor describing the cost
+
+        Notes
+        -----
+        Mean squared error across batch 
+        """
+        return T.sum(T.mean(T.sqr(Y-Y_hat), axis = batch_axis))
+
 
 
 class RectifierConvNonlinearity(ConvNonlinearity):
@@ -2813,6 +2844,33 @@ def get_monitoring_channels_from_state(self, state, target,
             rval['per_output_f1_min'] = f1.min()
 
         return rval
+        
+    @wraps(Linear.cost)
+    def cost(self, Y, Y_hat, batch_axis):
+        """
+        Parameters
+        ----------
+        Y : theano.gof.Variable
+            Output of `fprop`
+        Y_hat : theano.gof.Variable
+            Targets
+        batch_axis : integer 
+            axis representing batch dimension
+
+        Returns
+        -------
+        cost : theano.gof.Variable
+            0-D tensor describing the cost
+
+        Notes
+        -----
+        Cost mean across units, mean across batch of KL divergence
+        KL(P || Q) where P is defined by Y and Q is defined by Y_hat
+        KL(P || Q) = p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q)
+        """
+        ave_total = kl(Y=Y, Y_hat=Y_hat, batch_axis=batch_axis)
+        ave = ave_total.mean()
+        return ave
 
 
 class TanhConvNonlinearity(ConvNonlinearity):
@@ -3248,41 +3306,26 @@ def fprop(self, state_below):
             p = self.output_normalization(p)
 
         return p
-
+       
+    @wraps(Layer.cost)
     def cost(self, Y, Y_hat):
         """
-        Cost for convnets is hardcoded to be the cost for sigmoids.
-        TODO: move the cost into the non-linearity class.
-
         Parameters
         ----------
         Y : theano.gof.Variable
-            Output of `fprop`
+             Output of `fprop`
         Y_hat : theano.gof.Variable
             Targets
 
         Returns
         -------
         cost : theano.gof.Variable
             0-D tensor describing the cost
-
-        Notes
-        -----
-        Cost mean across units, mean across batch of KL divergence
-        KL(P || Q) where P is defined by Y and Q is defined by Y_hat
-        KL(P || Q) = p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q)
-        """
-        assert self.nonlin.non_lin_name == "sigmoid", ("ConvElemwise "
-                                                       "supports "
-                                                       "cost function "
-                                                       "for only "
-                                                       "sigmoid layer "
-                                                       "for now.")
+        """              
+    
         batch_axis = self.output_space.get_batch_axis()
-        ave_total = kl(Y=Y, Y_hat=Y_hat, batch_axis=batch_axis)
-        ave = ave_total.mean()
-        return ave
-
+        return self.nonlin.cost(Y=Y, Y_hat=Y_hat, batch_axis=batch_axis)
+        
 
 class ConvRectifiedLinear(ConvElemwise):
 
diff --git a/pylearn2/models/tests/test_costs.py b/pylearn2/models/tests/test_costs.py
@@ -1,7 +1,8 @@
 
 """
-Notes: Cost function is not implemented for IdentityConvNonlinearity, RectifierConvNonlinearity, TanhConvNonlinearity.  It is bugged for SigmoidConvNonlinearity, but we are
-not triggering that bug here. The cost function is not implemented for standard mlp RectifiedLinear or Tanh.
+Notes: Cost function is not implemented for IdentityConvNonlinearity, RectifierConvNonlinearity, 
+TanhConvNonlinearity.  It is bugged for SigmoidConvNonlinearity, but we are not triggering the 
+bug here. The cost function is also not implemented for standard mlp RectifiedLinear or Tanh.
 """
 
 
@@ -17,25 +18,25 @@
 from pylearn2.space import Conv2DSpace
 from pylearn2.models.mlp import SigmoidConvNonlinearity, TanhConvNonlinearity, IdentityConvNonlinearity, RectifierConvNonlinearity
 
-
-
-
 #def test_costs():
 
 # Create fake data
 np.random.seed(12345)
 
 
-r = 13
-s = 11
+r = 31
+s = 21
 shape = [r, s]
 nvis = r*s
-output_channels = 17
-batch_size = 1
+output_channels = 13
+batch_size = 103
 
 x = np.random.rand(batch_size, r, s, 1)
 y = np.random.randint(2, size = [batch_size, output_channels, 1 ,1])
 
+x = x.astype('float32')
+y = y.astype('float32')
+
 x_mlp = x.flatten().reshape(batch_size, nvis)
 y_mlp = y.flatten().reshape(batch_size, output_channels)
 
@@ -63,11 +64,15 @@
 )
 
 W, b = conv_model.get_param_values()
+W = W.astype('float32')
+b = b.astype('float32')
 W_mlp = np.zeros(shape = (output_channels, nvis))
 for k in range(output_channels):
     W_mlp[k] = W[k, 0].flatten()[::-1]
 W_mlp = W_mlp.T
 b_mlp = b.flatten()
+W_mlp = W_mlp.astype('float32')
+b_mlp = b_mlp.astype('float32')
 mlp_model.set_param_values([W_mlp, b_mlp])
 
 X1 = mlp_model.get_input_space().make_theano_batch()
@@ -77,13 +82,14 @@
 
 
 # Check that the two models give the same throughput
-assert np.linalg.norm(f(x_mlp).flatten() -  g(x).flatten()) < 10**-10
-print "Fprop ok"
+assert  np.linalg.norm(f(x_mlp).flatten() -  g(x).flatten()) < 10**-3
+print "f-prop ok"
 
 # Cost functions:
 mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat))
 print "mlp_cost = "+str(mlp_cost(x_mlp, y_mlp))
 
+batch_axis = T.scalar()
 conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat))
 print "conv_cost = "+str(conv_cost(x,y))