Added support for alternate activation functions

rriva002 · rriva002 · commit f2d82e134af6 · 2019-07-03T18:49:43.000-04:00
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ Fork of Shawn Ng's [CNNs for Sentence Classification in PyTorch](https://github.
 
 ## Known Issues
 * The predict method is probably not as efficient as it could be.
-* Doesn't play well with GridSearchCV if num_jobs isn't 1.
+* Doesn't play well with GridSearchCV if num_jobs isn't 1 (unless not using CUDA).
 * Only supports pre-trained word vectors from TorchText.
 * The random_state parameter probably only works with integers or None.
 * Training samples shorter than the maximum kernel size are ignored.
@@ -64,23 +64,26 @@ Fork of Shawn Ng's [CNNs for Sentence Classification in PyTorch](https://github.
 **cuda : boolean, optional (default=True)**
   If true, use the GPU if available.
 
-**class_weight : dict, "balanced" or None, optional (default=None)**
-  Weights associated with each class (see class_weight parameter in existing scikit-learn classifiers).
-
-**split_ratio : float, optional (default=0.9)**
-  Ratio of training data used for training. The remainder will be used for validation.
+** activation_func : string, optional (default='relu')**
+  Activation function. If 'relu' or 'tanh', uses rectified linear unit or hyperbolic tangent, respectively. Otherwise, uses no activation function (f(x) = x).
 
-**random_state : integer, optional (default=None)**
-  Seed for the random number generator.
+**scoring : callable or None, optional (default=sklearn.metrics.accuracy_score)**
+  Scoring method for testing model performance during fitting.
 
 **vectors : string, optional (default=None)**
   Which pretrained TorchText vectors to use (see [torchtext.vocab.pretrained_aliases](https://torchtext.readthedocs.io/en/latest/vocab.html#pretrained-aliases) for options).
 
+**split_ratio : float, optional (default=0.9)**
+  Ratio of training data used for training. The remainder will be used for validation.
+
 **preprocessor : callable or None, optional (default=None)**
   Override default string preprocessing.
 
-**scoring : callable or None, optional (default=sklearn.metrics.accuracy_score)**
-  Scoring method for testing model performance during fitting.
+**class_weight : dict, "balanced" or None, optional (default=None)**
+  Weights associated with each class (see class_weight parameter in existing scikit-learn classifiers).
+
+**random_state : integer, optional (default=None)**
+  Seed for the random number generator.
 
 **verbose : integer, optional (default=0)**
   Controls the verbosity when fitting.
diff --git a/cnn_text_classification.py b/cnn_text_classification.py
@@ -15,10 +15,10 @@ class CNNClassifier(BaseEstimator, ClassifierMixin):
     def __init__(self, lr=0.001, epochs=256, batch_size=64, test_interval=100,
                  early_stop=1000, save_best=True, dropout=0.5, max_norm=0.0,
                  embed_dim=128, kernel_num=100, kernel_sizes="3,4,5",
-                 static=False, device=-1, cuda=True, class_weight=None,
-                 split_ratio=0.9, random_state=None, vectors=None,
-                 preprocessor=None, scoring=make_scorer(accuracy_score),
-                 verbose=0):
+                 static=False, device=-1, cuda=True, activation_func="relu",
+                 scoring=make_scorer(accuracy_score), vectors=None,
+                 split_ratio=0.9, preprocessor=None, class_weight=None,
+                 random_state=None, verbose=0):
         self.lr = lr
         self.epochs = epochs
         self.batch_size = batch_size
@@ -33,12 +33,13 @@ def __init__(self, lr=0.001, epochs=256, batch_size=64, test_interval=100,
         self.static = static
         self.device = device
         self.cuda = cuda
-        self.class_weight = class_weight
-        self.split_ratio = split_ratio
-        self.random_state = random_state
+        self.activation_func = activation_func
+        self.scoring = scoring
         self.vectors = vectors
+        self.split_ratio = split_ratio
         self.preprocessor = preprocessor
-        self.scoring = scoring
+        self.class_weight = class_weight
+        self.random_state = random_state
         self.verbose = verbose
 
     def __clean_str(self, string):
@@ -100,7 +101,8 @@ def fit(self, X, y, sample_weight=None):
         kernel_sizes = [int(k) for k in self.kernel_sizes.split(",")]
         self.__model = _CNNText(embed_num, self.embed_dim, class_num,
                                 self.kernel_num, kernel_sizes, self.dropout,
-                                self.static, self.__text_field.vocab.vectors)
+                                self.static, self.activation_func,
+                                vectors=self.__text_field.vocab.vectors)
 
         if self.cuda and torch.cuda.is_available():
             torch.cuda.set_device(self.device)
@@ -257,7 +259,7 @@ def __print_elapsed_time(self, seconds):
 
 class _CNNText(nn.Module):
     def __init__(self, embed_num, embed_dim, class_num, kernel_num,
-                 kernel_sizes, dropout, static, vectors=None):
+                 kernel_sizes, dropout, static, activation_func, vectors=None):
         super(_CNNText, self).__init__()
 
         self.__embed = nn.Embedding(embed_num, embed_dim)
@@ -272,13 +274,16 @@ def __init__(self, embed_num, embed_dim, class_num, kernel_num,
         self.__fc1 = nn.Linear(len(Ks) * kernel_num, class_num)
         self.__static = static
 
-    def conv_and_pool(self, x, conv):
-        x = F.relu(conv(x)).squeeze(3)
-        return F.max_pool1d(x, x.size(2)).squeeze(2)
+        if activation_func == "relu":
+            self.__f = F.relu
+        elif activation_func == "tanh":
+            self.__f = torch.tanh
+        else:
+            self.__f = lambda x: x
 
     def forward(self, x):
         x = Variable(self.__embed(x)) if self.__static else self.__embed(x)
-        x = [F.relu(conv(x.unsqueeze(1))).squeeze(3) for conv in self.__convs1]
+        x = [self.__f(cnv(x.unsqueeze(1))).squeeze(3) for cnv in self.__convs1]
         x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
         return self.__fc1(self.__dropout(torch.cat(x, 1)))