turn all relu -> elu, remove dropout, found it improved stability while training

brianmanderson · brianmanderson · commit ac7441883e34 · 2020-03-21T17:11:15.000-05:00
diff --git a/model.py b/model.py
@@ -20,6 +20,8 @@
 from __future__ import print_function
 
 
+import tensorflow as tf
+
 from tensorflow.python.keras.models import Model
 from tensorflow.python.keras import layers
 from tensorflow.python.keras.layers import Input
@@ -71,17 +73,17 @@ def SepConv_BN(x, filters, prefix, stride=1, kernel_size=3, rate=1, depth_activa
         depth_padding = 'valid'
 
     if not depth_activation:
-        x = Activation('relu')(x)
+        x = Activation('elu')(x)
     x = DepthwiseConv2D((kernel_size, kernel_size), strides=(stride, stride), dilation_rate=(rate, rate),
                         padding=depth_padding, use_bias=False, name=prefix + '_depthwise')(x)
     x = BatchNormalization(name=prefix + '_depthwise_BN', epsilon=epsilon)(x)
     if depth_activation:
-        x = Activation('relu')(x)
+        x = Activation('elu')(x)
     x = Conv2D(filters, (1, 1), padding='same',
                use_bias=False, name=prefix + '_pointwise')(x)
     x = BatchNormalization(name=prefix + '_pointwise_BN', epsilon=epsilon)(x)
     if depth_activation:
-        x = Activation('relu')(x)
+        x = Activation('elu')(x)
 
     return x
 
@@ -273,11 +275,11 @@ def Deeplabv3(weights='pascal_voc', input_tensor=None, input_shape=(512, 512, 3)
         x = Conv2D(32, (3, 3), strides=(2, 2),
                    name='entry_flow_conv1_1', use_bias=False, padding='same')(img_input)
         x = BatchNormalization(name='entry_flow_conv1_1_BN')(x)
-        x = Activation('relu')(x)
+        x = Activation('elu')(x)
 
         x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
         x = BatchNormalization(name='entry_flow_conv1_2_BN')(x)
-        x = Activation('relu')(x)
+        x = Activation('elu')(x)
 
         x = _xception_block(x, [128, 128, 128], 'entry_flow_block1',
                             skip_connection_type='conv', stride=2,
@@ -366,14 +368,14 @@ def Deeplabv3(weights='pascal_voc', input_tensor=None, input_shape=(512, 512, 3)
     b4 = Conv2D(256, (1, 1), padding='same',
                 use_bias=False, name='image_pooling')(b4)
     b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
-    b4 = Activation('relu')(b4)
+    b4 = Activation('elu')(b4)
     # upsample. have to use compat because of the option align_corners
     size_before = K.int_shape(x)
     b4 = UpSampling2D(size=(size_before[1],size_before[2]),interpolation='bilinear')(b4)
     # simple 1x1
     b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
     b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
-    b0 = Activation('relu', name='aspp0_activation')(b0)
+    b0 = Activation('elu', name='aspp0_activation')(b0)
 
     # there are only 2 branches in mobilenetV2. not sure why
     if backbone == 'xception':
@@ -395,8 +397,8 @@ def Deeplabv3(weights='pascal_voc', input_tensor=None, input_shape=(512, 512, 3)
     x = Conv2D(256, (1, 1), padding='same',
                use_bias=False, name='concat_projection')(x)
     x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
-    x = Activation('relu')(x)
-    x = Dropout(0.1)(x)
+    x = Activation('elu')(x)
+    # x = Dropout(0.1)(x)
     # DeepLab v.3+ decoder
     
     if backbone == 'xception':
@@ -409,7 +411,7 @@ def Deeplabv3(weights='pascal_voc', input_tensor=None, input_shape=(512, 512, 3)
                            use_bias=False, name='feature_projection0')(skip1)
         dec_skip1 = BatchNormalization(
             name='feature_projection0_BN', epsilon=1e-5)(dec_skip1)
-        dec_skip1 = Activation('relu')(dec_skip1)
+        dec_skip1 = Activation('elu')(dec_skip1)
         x = Concatenate()([x, dec_skip1])
         x = SepConv_BN(x, 256, 'decoder_conv0',
                        depth_activation=True, epsilon=1e-5)