Adding Loss, LossFunctionWrapper, MeanSquaredError classes. (#12859)

pavithrasv · fchollet · commit 0792332f77e6 · 2019-05-23T17:03:04.000-07:00
* Adding Loss, LossFunctionWrapper, MeanSquaredError classes.

* Fixing formatting issues.

* Adding arguments list to MeanSquaredError.

* Fix abstract method
diff --git a/keras/backend/__init__.py b/keras/backend/__init__.py
@@ -149,6 +149,7 @@
 from .load_backend import name_scope
 from .load_backend import symbolic
 from .load_backend import eager
+from .load_backend import size
 
 if backend() == 'theano':
     from .load_backend import pattern_broadcast
diff --git a/keras/backend/cntk_backend.py b/keras/backend/cntk_backend.py
@@ -559,6 +559,10 @@ def cast(x, dtype):
     return x
 
 
+def size(x, name=None):
+    return sum(ones_like(x, name=name))
+
+
 def dot(x, y):
     if len(x.shape) > 2 or len(y.shape) > 2:
         y_shape = int_shape(y)
diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
@@ -831,6 +831,29 @@ def ndim(x):
     return x.shape.rank
 
 
+def size(x, name=None):
+    """Returns the size of a tensor.
+
+    # Arguments
+        x: Tensor or variable.
+        name: A name for the operation (optional).
+
+    # Returns
+        Size of the tensor.
+
+    # Examples
+    ```python
+    >>> from keras import backend as K
+    >>> val = np.array([[1, 2], [3, 4]])
+    >>> kvar = K.variable(value=val)
+    >>> K.size(inputs)
+    <tf.Tensor: id=9, shape=(), dtype=int32, numpy=4>
+    ```
+
+    """
+    return tf.size(x, name=name)
+
+
 def dtype(x):
     """Returns the dtype of a Keras tensor or variable, as a string.
 
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
@@ -374,6 +374,18 @@ def count_params(x):
 
 def cast(x, dtype):
     return T.cast(x, dtype)
+    
+
+def size(x, name=None):
+    """Returns the size of a tensor.
+    # Arguments
+        x: The input tensor.
+        name: A name for the operation (optional).
+    # Returns
+        Size of the tensor.
+    ```
+    """
+    return sum(ones_like(x, name=name))
 
 
 # UPDATES OPS
diff --git a/keras/losses.py b/keras/losses.py
@@ -4,12 +4,174 @@
 from __future__ import division
 from __future__ import print_function
 
+import abc
 import six
+
 from . import backend as K
+from .utils import losses_utils
 from .utils.generic_utils import deserialize_keras_object
 from .utils.generic_utils import serialize_keras_object
 
 
+@six.add_metaclass(abc.ABCMeta)
+class Loss(object):
+    """Loss base class.
+
+    To be implemented by subclasses:
+        * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`.
+
+    Example subclass implementation:
+    ```python
+    class MeanSquaredError(Loss):
+        def call(self, y_true, y_pred):
+            y_pred = ops.convert_to_tensor(y_pred)
+            y_true = math_ops.cast(y_true, y_pred.dtype)
+            return K.mean(math_ops.square(y_pred - y_true), axis=-1)
+    ```
+
+    # Arguments
+        reduction: (Optional) Type of loss Reduction to apply to loss.
+          Default value is `SUM_OVER_BATCH_SIZE`.
+        name: Optional name for the op.
+    """
+
+    def __init__(self,
+                 reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE,
+                 name=None):
+        self.reduction = reduction
+        self.name = name
+
+    def __call__(self, y_true, y_pred, sample_weight=None):
+        """Invokes the `Loss` instance.
+
+        # Arguments
+            y_true: Ground truth values.
+            y_pred: The predicted values.
+            sample_weight: Optional `Tensor` whose rank is either 0, or the same rank
+            as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a
+            coefficient for the loss. If a scalar is provided, then the loss is
+            simply scaled by the given value. If `sample_weight` is a tensor of size
+            `[batch_size]`, then the total loss for each sample of the batch is
+            rescaled by the corresponding element in the `sample_weight` vector. If
+            the shape of `sample_weight` matches the shape of `y_pred`, then the
+            loss of each measurable element of `y_pred` is scaled by the
+            corresponding value of `sample_weight`.
+
+        # Returns
+            Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
+                shape as `y_true`; otherwise, it is scalar.
+
+        # Raises
+            ValueError: If the shape of `sample_weight` is invalid.
+        """
+        # If we are wrapping a lambda function strip '<>' from the name as it is not
+        # accepted in scope name.
+        scope_name = 'lambda' if self.name == '<lambda>' else self.name
+        with K.name_scope(scope_name):
+            losses = self.call(y_true, y_pred)
+            return losses_utils.compute_weighted_loss(
+                losses, sample_weight, reduction=self.reduction)
+
+    @classmethod
+    def from_config(cls, config):
+        """Instantiates a `Loss` from its config (output of `get_config()`).
+
+        # Arguments
+            config: Output of `get_config()`.
+
+        # Returns
+            A `Loss` instance.
+        """
+        return cls(**config)
+
+    def get_config(self):
+        return {'reduction': self.reduction, 'name': self.name}
+
+    @abc.abstractmethod
+    def call(self, y_true, y_pred):
+        """Invokes the `Loss` instance.
+
+        # Arguments
+            y_true: Ground truth values, with the same shape as 'y_pred'.
+            y_pred: The predicted values.
+        """
+        raise NotImplementedError('Must be implemented in subclasses.')
+
+
+class LossFunctionWrapper(Loss):
+    """Wraps a loss function in the `Loss` class.
+
+    # Arguments
+        fn: The loss function to wrap, with signature `fn(y_true, y_pred,
+            **kwargs)`.
+        reduction: (Optional) Type of loss reduction to apply to loss.
+            Default value is `SUM_OVER_BATCH_SIZE`.
+        name: (Optional) name for the loss.
+        **kwargs: The keyword arguments that are passed on to `fn`.
+    """
+
+    def __init__(self,
+                 fn,
+                 reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE,
+                 name=None,
+                 **kwargs):
+        super(LossFunctionWrapper, self).__init__(reduction=reduction, name=name)
+        self.fn = fn
+        self._fn_kwargs = kwargs
+
+    def call(self, y_true, y_pred):
+        """Invokes the `LossFunctionWrapper` instance.
+
+        # Arguments
+            y_true: Ground truth values.
+            y_pred: The predicted values.
+
+        # Returns
+            Loss values per sample.
+        """
+        return self.fn(y_true, y_pred, **self._fn_kwargs)
+
+    def get_config(self):
+        config = {}
+        for k, v in six.iteritems(self._fn_kwargs):
+            config[k] = K.eval(v) if is_tensor_or_variable(v) else v
+        base_config = super(LossFunctionWrapper, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+
+class MeanSquaredError(LossFunctionWrapper):
+    """Computes the mean of squares of errors between labels and predictions.
+
+    For example, if `y_true` is [0., 0., 1., 1.] and `y_pred` is [1., 1., 1., 0.]
+    then the mean squared error value is 3/4 (0.75).
+
+    Standalone usage:
+
+    ```python
+    mse = keras.losses.MeanSquaredError()
+    loss = mse([0., 0., 1., 1.], [1., 1., 1., 0.])
+    ```
+
+    Usage with the `compile` API:
+
+    ```python
+    model = keras.Model(inputs, outputs)
+    model.compile('sgd', loss=keras.losses.MeanSquaredError())
+    ```
+
+    # Arguments
+        reduction: (Optional) Type of loss reduction to apply to loss.
+            Default value is `SUM_OVER_BATCH_SIZE`.
+        name: (Optional) name for the loss.
+    """
+
+    def __init__(self,
+                 reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE,
+                 name='mean_squared_error'):
+        super(MeanSquaredError, self).__init__(
+            mean_squared_error, name=name, reduction=reduction)
+
+
 def mean_squared_error(y_true, y_pred):
     return K.mean(K.square(y_pred - y_true), axis=-1)
 
diff --git a/keras/utils/__init__.py b/keras/utils/__init__.py
@@ -4,6 +4,7 @@
 from . import data_utils
 from . import io_utils
 from . import conv_utils
+from . import losses_utils
 
 # Globally-importable utils.
 from .io_utils import HDF5Matrix
diff --git a/keras/utils/losses_utils.py b/keras/utils/losses_utils.py
@@ -0,0 +1,136 @@
+"""Utilities related to losses."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from .. import backend as K
+
+
+class Reduction(object):
+    """Types of loss reduction.
+
+    Contains the following values:
+
+    * `NONE`: Un-reduced weighted losses with the same shape as input. When this
+        reduction type used with built-in Keras training loops like
+        `fit`/`evaluate`, the unreduced vector loss is passed to the optimizer but
+        the reported loss will be a scalar value.
+    * `SUM`: Scalar sum of weighted losses.
+    * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
+    """
+
+    NONE = 'none'
+    SUM = 'sum'
+    SUM_OVER_BATCH_SIZE = 'sum_over_batch_size'
+
+    @classmethod
+    def all(cls):
+        return (cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE)
+
+    @classmethod
+    def validate(cls, key):
+        if key not in cls.all():
+            raise ValueError('Invalid Reduction Key %s.' % key)
+
+
+def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight):
+    """Squeeze or expand last dimension if needed.
+
+    1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1.
+    2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
+    from the new rank of `y_pred`.
+    If `sample_weight` is scalar, it is kept scalar.
+
+    # Arguments
+        y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
+        y_true: Optional label `Tensor` whose dimensions match `y_pred`.
+        sample_weight: Optional weight scalar or `Tensor` whose dimensions match
+            `y_pred`.
+
+    # Returns
+        Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
+        the last dimension squeezed, `sample_weight` could be extended by one
+        dimension.
+    """
+    if y_true is not None:
+        y_pred_rank = K.ndim(y_pred)
+        y_pred_shape = K.int_shape(y_pred)
+        y_true_rank = K.ndim(y_true)
+        y_true_shape = K.int_shape(y_true)
+
+        if (y_pred_rank - y_true_rank == 1) and (y_pred_shape[-1] == 1):
+            y_pred = K.squeeze(y_pred, -1)
+        elif (y_true_rank - y_pred_rank == 1) and (y_true_shape[-1] == 1):
+            y_true = K.squeeze(y_true, -1)
+
+    if sample_weight is None:
+        return y_pred, y_true, None
+
+    y_pred_rank = K.ndim(y_pred)
+    weights_rank = K.ndim(sample_weight)
+    if weights_rank != 0:
+        if weights_rank - y_pred_rank == 1:
+            sample_weight = K.squeeze(sample_weight, -1)
+        elif y_pred_rank - weights_rank == 1:
+            sample_weight = K.expand_dims(sample_weight, -1)
+    return y_pred, y_true, sample_weight
+
+
+def _num_elements(losses):
+    """Computes the number of elements in `losses` tensor."""
+    with K.name_scope('num_elements') as scope:
+        return K.cast(K.size(losses, name=scope), losses.dtype)
+
+
+def reduce_weighted_loss(weighted_losses, reduction=Reduction.SUM_OVER_BATCH_SIZE):
+    """Reduces the individual weighted loss measurements."""
+    if reduction == Reduction.NONE:
+        loss = weighted_losses
+    else:
+        loss = K.sum(weighted_losses)
+        if reduction == Reduction.SUM_OVER_BATCH_SIZE:
+            loss = loss / _num_elements(weighted_losses)
+    return loss
+
+
+def compute_weighted_loss(losses,
+                          sample_weight=None,
+                          reduction=Reduction.SUM_OVER_BATCH_SIZE,
+                          name=None):
+    """Computes the weighted loss.
+
+    # Arguments
+        losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
+        sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
+        `   losses`, or be broadcastable to `losses`.
+        reduction: (Optional) Type of Reduction to apply to loss.
+            Default value is `SUM_OVER_BATCH_SIZE`.
+        name: Optional name for the op.
+
+    # Raises
+        ValueError: If the shape of `sample_weight` is not compatible with `losses`.
+
+    # Returns
+        Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
+            `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
+    """
+    Reduction.validate(reduction)
+    if sample_weight is None:
+        sample_weight = 1.0
+    with K.name_scope(name or 'weighted_loss'):
+        input_dtype = K.dtype(losses)
+        losses = K.cast(losses, K.floatx())
+        sample_weight = K.cast(sample_weight, K.floatx())
+
+        # Update dimensions of `sample_weight` to match with `losses` if possible.
+        losses, _, sample_weight = squeeze_or_expand_dimensions(
+            losses, None, sample_weight)
+
+        weighted_losses = losses * sample_weight
+        # Apply reduction function to the individual weighted losses.
+        loss = reduce_weighted_loss(weighted_losses, reduction)
+        # Convert the result back to the input type.
+        loss = K.cast(loss, input_dtype)
+        return loss
diff --git a/tests/keras/losses_test.py b/tests/keras/losses_test.py