Objective outputs should rescale based on sample_weights

wxs · wxs · commit 7e678b831566 · 2016-02-04T15:57:57.000-05:00
If sample_weights is to be used as a mask as well as for re-weighting
then it's important that, at least when used as a mask, the output be
rescaled. Otherwise the order of magnitude of your objective changes
purely based on the number of masked entries in your training data.
diff --git a/keras/models.py b/keras/models.py
@@ -97,6 +97,7 @@ def weighted(y_true, y_pred, weights, mask=None):
         # apply sample weighting
         if weights is not None:
             score_array *= weights
+            score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx()))
         return K.mean(score_array)
     return weighted
 
diff --git a/tests/integration_tests/test_temporal_data_tasks.py b/tests/integration_tests/test_temporal_data_tasks.py
@@ -5,8 +5,9 @@
 
 from keras.utils.test_utils import get_test_data
 from keras.models import Sequential
-from keras.layers.core import TimeDistributedDense, Dropout, Dense
+from keras.layers.core import TimeDistributedDense, Dropout, Dense, Activation
 from keras.layers.recurrent import GRU, LSTM
+from keras.layers.embeddings import Embedding
 from keras.utils.np_utils import to_categorical
 
 
@@ -126,6 +127,48 @@ def test_stacked_lstm_char_prediction():
     # check that it did generate the alphabet correctly
     assert(generated == alphabet)
 
+def test_masked_temporal():
+    '''
+    Confirm that even with masking on both inputs and outputs, cross-entropies are
+    of the expected scale.
+
+    In this task, there are variable length inputs of integers from 1-9, and a random
+    subset of unmasked outputs. Each of these outputs has a 50% probability of being
+    the input number unchanged, and a 50% probability of being 2*input%10.
+
+    The ground-truth best cross-entropy loss should, then be -log(0.5) = 0.69
+
+    '''
+    np.random.seed(55318)
+    model = Sequential()
+    model.add(Embedding(10, 20, mask_zero=True))
+    model.add(TimeDistributedDense(10))
+    model.add(Activation('softmax'))
+    model.compile(loss='categorical_crossentropy',
+                  optimizer='adam', sample_weight_mode="temporal")
+
+    X = np.random.random_integers(1, 9, (50000, 20))
+    for rowi in range(X.shape[0]):
+        padding = np.random.random_integers(X.shape[1]/2)
+        X[rowi, :padding] = 0
+
+    # 50% of the time the correct output is the input. The other 50% of the time
+    # it's 2*input%10
+    y = (X * np.random.random_integers(1, 2, X.shape))%10
+    Y = np.zeros((y.size, 10), dtype='int32')
+    for i, target in enumerate(y.flat):
+        Y[i, target] = 1
+    Y = Y.reshape(y.shape + (10,))
+
+    # Mask 50% of the outputs via sample weights
+    sample_weight = np.random.random_integers(0, 1, y.shape)
+    print("X shape: ", X.shape)
+    print("Y shape: ", Y.shape)
+    print("sample_weight shape: ", Y.shape)
+
+    history = model.fit(X, Y, validation_split=0.05, sample_weight=sample_weight,verbose=1, nb_epoch=2)
+    ground_truth = -np.log(0.5)
+    assert(np.abs(history.history['val_loss'][-1] - ground_truth) < 0.05)
 
 if __name__ == '__main__':
     pytest.main([__file__])