sky7hate
diff --git a/‎java/src/DeepLearning/DBN.java‎
Lines changed: 6 additions & 6 deletions b/‎java/src/DeepLearning/DBN.java‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎java/src/DeepLearning/Dropout.java‎
Lines changed: 225 additions & 0 deletions b/‎java/src/DeepLearning/Dropout.java‎
Lines changed: 225 additions & 0 deletions
diff --git a/‎java/src/DeepLearning/HiddenLayer.java‎
Lines changed: 61 additions & 9 deletions b/‎java/src/DeepLearning/HiddenLayer.java‎
Lines changed: 61 additions & 9 deletions
@@ -9,9 +9,9 @@ public class DBN {
     public int[] hidden_layer_sizes;
     public int n_outs;
     public int n_layers;
-    public HiddenLayer[] sigmoid_layers;
+    public HiddenLayerDiscrete[] sigmoid_layers;
     public RBM[] rbm_layers;
-    public LogisticRegression log_layer;
+    public LogisticRegressionDiscrete log_layer;
     public Random rng;
 
 
@@ -24,7 +24,7 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers,
         this.n_outs = n_outs;
         this.n_layers = n_layers;
 
-        this.sigmoid_layers = new HiddenLayer[n_layers];
+        this.sigmoid_layers = new HiddenLayerDiscrete[n_layers];
         this.rbm_layers = new RBM[n_layers];
 
         if(rng == null)	this.rng = new Random(1234);
@@ -39,14 +39,14 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers,
             }
 
             // construct sigmoid_layer
-            this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
+            this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
 
             // construct rbm_layer
             this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
         }
 
-        // layer for output using DNN.LogisticRegression
-        this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
+        // layer for output using Logistic Regression
+        this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
     }
 
     public void pretrain(int[][] train_X, double lr, int k, int epochs) {
 
@@ -0,0 +1,225 @@
+package DeepLearning;
+
+import java.util.Random;
+import java.util.List;
+import java.util.ArrayList;
+
+public class Dropout {
+    public int N;
+    public int n_in;
+    public int[] hidden_layer_sizes;
+    public int n_out;
+    public int n_layers;
+    public HiddenLayer[] hiddenLayers;
+    public LogisticRegression logisticLayer;
+    public Random rng;
+
+
+    public Dropout(int N, int n_in, int[] hidden_layer_sizes, int n_out, Random rng, String activation) {
+        this.N = N;
+        this.n_in = n_in;
+        this.hidden_layer_sizes = hidden_layer_sizes;
+        this.n_layers = hidden_layer_sizes.length;
+        this.n_out = n_out;
+
+        this.hiddenLayers = new HiddenLayer[n_layers];
+
+        if (rng == null) rng = new Random(1234);
+        this.rng = rng;
+
+        if (activation == null) activation = "ReLU";
+
+        // construct multi-layer
+        int input_size;
+        for(int i=0; i<this.n_layers; i++) {
+            // layer_size
+            if(i == 0) {
+                input_size = n_in;
+            } else {
+                input_size = hidden_layer_sizes[i-1];
+            }
+
+            // construct hiddenLayer
+            this.hiddenLayers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], null, null, rng, activation);
+
+        }
+
+        // construct logisticLayer
+        this.logisticLayer = new LogisticRegression(N, hidden_layer_sizes[this.n_layers-1], n_out);
+
+    }
+
+    public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropout, double p_dropout, double lr) {
+        List<int[]> dropout_masks;
+        List<double[]> layer_inputs;
+        double[] layer_input;
+        double[] layer_output = new double[0];
+
+        for(int epoch=0; epoch<epochs; epoch++) {
+
+            for(int n=0; n<N; n++) {
+
+                dropout_masks = new ArrayList<>(n_layers);
+                layer_inputs = new ArrayList<>(n_layers+1);  // +1 for logistic layer
+
+                // forward hiddenLayers
+                for(int i=0; i<n_layers; i++) {
+
+                    if(i == 0) layer_input = train_X[n];
+                    else layer_input = layer_output.clone();
+
+                    layer_inputs.add(layer_input.clone());
+
+                    layer_output = new double[hidden_layer_sizes[i]];
+                    hiddenLayers[i].forward(layer_input, layer_output);
+
+                    if(dropout) {
+                        int[] mask;
+                        mask = hiddenLayers[i].dropout(layer_output.length, p_dropout, rng);
+                        for(int j=0; j<layer_output.length; j++) layer_output[j] *= mask[j];
+
+                        dropout_masks.add(mask.clone());
+                    }
+
+                }
+
+
+                // forward & backward logisticLayer
+                double[] logistic_layer_dy; // = new double[n_out];
+                logistic_layer_dy = logisticLayer.train(layer_output, train_Y[n], lr); //, logistic_layer_dy);
+                layer_inputs.add(layer_output.clone());
+
+                // backward hiddenLayers
+                double[] prev_dy = logistic_layer_dy;
+                double[][] prev_W;
+                double[] dy = new double[0];
+
+                for(int i=n_layers-1; i>=0; i--) {
+
+                    if(i == n_layers-1) {
+                        prev_W = logisticLayer.W;
+                    } else {
+                        prev_dy = dy.clone();
+                        prev_W = hiddenLayers[i+1].W;
+                    }
+
+                    dy = new double[hidden_layer_sizes[i]];
+                    hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr);
+
+                    if(dropout) {
+                        for(int j=0; j<dy.length; j++) {
+                            dy[j] *= dropout_masks.get(i)[j];
+                        }
+                    }
+                }
+
+            }
+        }
+    }
+
+
+    public void pretest(double p_dropout) {
+        for(int i=0; i<n_layers; i++) {
+            int in;
+            int out;
+
+            if (i == 0) in = n_in;
+            else in = hidden_layer_sizes[i];
+
+            if (i == n_layers - 1) out = n_out;
+            else out = hidden_layer_sizes[i+1];
+
+
+            for (int l = 0; l < out; l++) {
+                for (int m = 0; m < in; m++) {
+                    hiddenLayers[i].W[l][m] *= 1 - p_dropout;
+                }
+            }
+        }
+    }
+
+
+    public void predict(double[] x, double[] y) {
+        double[] layer_input;
+        double[] layer_output = new double[0];
+
+        for(int i=0; i<n_layers; i++) {
+
+            if(i == 0) layer_input = x;
+            else layer_input = layer_output.clone();
+
+            layer_output = new double[hidden_layer_sizes[i]];
+
+            hiddenLayers[i].forward(layer_input, layer_output);
+        }
+
+        logisticLayer.predict(layer_output, y);
+    }
+
+
+    private static void test_dropout() {
+        Random rng = new Random(123);
+
+        double learning_rate = 0.1;
+        int n_epochs = 5000;
+
+        int train_N = 4;
+        int test_N = 4;
+        int n_in = 2;
+        int[] hidden_layer_sizes = {10, 10};
+        int n_out = 2;
+
+        boolean dropout = true;
+        double p_dropout = 0.5;
+
+
+        double[][] train_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        int[][] train_Y = {
+                {0, 1},
+                {1, 0},
+                {1, 0},
+                {0, 1},
+        };
+
+        // construct Dropout
+        Dropout classifier = new Dropout(train_N, n_in, hidden_layer_sizes, n_out, rng, "ReLU");
+
+        // train
+        classifier.train(n_epochs, train_X, train_Y, dropout, p_dropout, learning_rate);
+
+        // pretest
+        if(dropout) classifier.pretest(p_dropout);
+
+
+        // test data
+        double[][] test_X = {
+                {0., 0.},
+                {0., 1.},
+                {1., 0.},
+                {1., 1.},
+        };
+
+        double[][] test_Y = new double[test_N][n_out];
+
+        // test
+        for(int i=0; i<test_N; i++) {
+            classifier.predict(test_X[i], test_Y[i]);
+            for(int j=0; j<n_out; j++) {
+                System.out.print(test_Y[i][j] + " ");
+            }
+            System.out.println();
+        }
+
+    }
+
+
+    public static void main(String[] args) {
+        test_dropout();
+    }
+}
@@ -1,6 +1,7 @@
 package DeepLearning;
 
 import java.util.Random;
+import java.util.function.DoubleFunction;
 import static DeepLearning.utils.*;
 
 public class HiddenLayer {
@@ -10,17 +11,18 @@ public class HiddenLayer {
     public double[][] W;
     public double[] b;
     public Random rng;
+    public DoubleFunction<Double> activation;
+    public DoubleFunction<Double> dactivation;
 
-
-    public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
+    public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) {
         this.N = N;
         this.n_in = n_in;
         this.n_out = n_out;
 
-        if(rng == null)	this.rng = new Random(1234);
+        if (rng == null) this.rng = new Random(1234);
         else this.rng = rng;
 
-        if(W == null) {
+        if (W == null) {
             this.W = new double[n_out][n_in];
             double a = 1.0 / this.n_in;
 
@@ -33,22 +35,72 @@ public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random
             this.W = W;
         }
 
-        if(b == null) this.b = new double[n_out];
+        if (b == null) this.b = new double[n_out];
         else this.b = b;
+
+        if (activation == "sigmoid" || activation == null) {
+            this.activation = (double x) -> sigmoid(x);
+            this.dactivation = (double x) -> dsigmoid(x);
+
+        } else if (activation == "tanh") {
+            this.activation = (double x) -> tanh(x);
+            this.dactivation = (double x) -> dtanh(x);
+        } else if (activation == "ReLU") {
+            this.activation = (double x) -> ReLU(x);
+            this.dactivation = (double x) -> dReLU(x);
+        } else {
+            throw new IllegalArgumentException("activation function not supported");
+        }
+
     }
 
-    public double output(int[] input, double[] w, double b) {
+    public double output(double[] input, double[] w, double b) {
         double linear_output = 0.0;
         for(int j=0; j<n_in; j++) {
             linear_output += w[j] * input[j];
         }
         linear_output += b;
-        return sigmoid(linear_output);
+
+        return activation.apply(linear_output);
+    }
+
+
+    public void forward(double[] input, double[] output) {
+        for(int i=0; i<n_out; i++) {
+            output[i] = this.output(input, W[i], b[i]);
+        }
     }
 
-    public void sample_h_given_v(int[] input, int[] sample) {
+    public void backward(double[] input, double[] dy, double[] prev_layer_input, double[] prev_layer_dy, double[][] prev_layer_W, double lr) {
+        if(dy == null) dy = new double[n_out];
+
+        int prev_n_in = n_out;
+        int prev_n_out = prev_layer_dy.length;
+
+        for(int i=0; i<prev_n_in; i++) {
+            dy[i] = 0;
+            for(int j=0; j<prev_n_out; j++) {
+                dy[i] += prev_layer_dy[j] * prev_layer_W[j][i];
+            }
+
+            dy[i] *= dactivation.apply(prev_layer_input[i]);
+        }
+
         for(int i=0; i<n_out; i++) {
-            sample[i] = binomial(1, output(input, W[i], b[i]), rng);
+            for(int j=0; j<n_in; j++) {
+                W[i][j] += lr * dy[i] * input[j] / N;
+            }
+            b[i] += lr * dy[i] / N;
+        }
+    }
+
+    public int[] dropout(int size, double p, Random rng) {
+        int[] mask = new int[size];
+
+        for(int i=0; i<size; i++) {
+            mask[i] = binomial(1, p, rng);
         }
+
+        return mask;
     }
 }