|
| 1 | +package DeepLearning; |
| 2 | + |
| 3 | +import java.util.Random; |
| 4 | +import java.util.List; |
| 5 | +import java.util.ArrayList; |
| 6 | + |
| 7 | +public class Dropout { |
| 8 | + public int N; |
| 9 | + public int n_in; |
| 10 | + public int[] hidden_layer_sizes; |
| 11 | + public int n_out; |
| 12 | + public int n_layers; |
| 13 | + public HiddenLayer[] hiddenLayers; |
| 14 | + public LogisticRegression logisticLayer; |
| 15 | + public Random rng; |
| 16 | + |
| 17 | + |
| 18 | + public Dropout(int N, int n_in, int[] hidden_layer_sizes, int n_out, Random rng, String activation) { |
| 19 | + this.N = N; |
| 20 | + this.n_in = n_in; |
| 21 | + this.hidden_layer_sizes = hidden_layer_sizes; |
| 22 | + this.n_layers = hidden_layer_sizes.length; |
| 23 | + this.n_out = n_out; |
| 24 | + |
| 25 | + this.hiddenLayers = new HiddenLayer[n_layers]; |
| 26 | + |
| 27 | + if (rng == null) rng = new Random(1234); |
| 28 | + this.rng = rng; |
| 29 | + |
| 30 | + if (activation == null) activation = "ReLU"; |
| 31 | + |
| 32 | + // construct multi-layer |
| 33 | + int input_size; |
| 34 | + for(int i=0; i<this.n_layers; i++) { |
| 35 | + // layer_size |
| 36 | + if(i == 0) { |
| 37 | + input_size = n_in; |
| 38 | + } else { |
| 39 | + input_size = hidden_layer_sizes[i-1]; |
| 40 | + } |
| 41 | + |
| 42 | + // construct hiddenLayer |
| 43 | + this.hiddenLayers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], null, null, rng, activation); |
| 44 | + |
| 45 | + } |
| 46 | + |
| 47 | + // construct logisticLayer |
| 48 | + this.logisticLayer = new LogisticRegression(N, hidden_layer_sizes[this.n_layers-1], n_out); |
| 49 | + |
| 50 | + } |
| 51 | + |
| 52 | + public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropout, double p_dropout, double lr) { |
| 53 | + List<int[]> dropout_masks; |
| 54 | + List<double[]> layer_inputs; |
| 55 | + double[] layer_input; |
| 56 | + double[] layer_output = new double[0]; |
| 57 | + |
| 58 | + for(int epoch=0; epoch<epochs; epoch++) { |
| 59 | + |
| 60 | + for(int n=0; n<N; n++) { |
| 61 | + |
| 62 | + dropout_masks = new ArrayList<>(n_layers); |
| 63 | + layer_inputs = new ArrayList<>(n_layers+1); // +1 for logistic layer |
| 64 | + |
| 65 | + // forward hiddenLayers |
| 66 | + for(int i=0; i<n_layers; i++) { |
| 67 | + |
| 68 | + if(i == 0) layer_input = train_X[n]; |
| 69 | + else layer_input = layer_output.clone(); |
| 70 | + |
| 71 | + layer_inputs.add(layer_input.clone()); |
| 72 | + |
| 73 | + layer_output = new double[hidden_layer_sizes[i]]; |
| 74 | + hiddenLayers[i].forward(layer_input, layer_output); |
| 75 | + |
| 76 | + if(dropout) { |
| 77 | + int[] mask; |
| 78 | + mask = hiddenLayers[i].dropout(layer_output.length, p_dropout, rng); |
| 79 | + for(int j=0; j<layer_output.length; j++) layer_output[j] *= mask[j]; |
| 80 | + |
| 81 | + dropout_masks.add(mask.clone()); |
| 82 | + } |
| 83 | + |
| 84 | + } |
| 85 | + |
| 86 | + |
| 87 | + // forward & backward logisticLayer |
| 88 | + double[] logistic_layer_dy; // = new double[n_out]; |
| 89 | + logistic_layer_dy = logisticLayer.train(layer_output, train_Y[n], lr); //, logistic_layer_dy); |
| 90 | + layer_inputs.add(layer_output.clone()); |
| 91 | + |
| 92 | + // backward hiddenLayers |
| 93 | + double[] prev_dy = logistic_layer_dy; |
| 94 | + double[][] prev_W; |
| 95 | + double[] dy = new double[0]; |
| 96 | + |
| 97 | + for(int i=n_layers-1; i>=0; i--) { |
| 98 | + |
| 99 | + if(i == n_layers-1) { |
| 100 | + prev_W = logisticLayer.W; |
| 101 | + } else { |
| 102 | + prev_dy = dy.clone(); |
| 103 | + prev_W = hiddenLayers[i+1].W; |
| 104 | + } |
| 105 | + |
| 106 | + dy = new double[hidden_layer_sizes[i]]; |
| 107 | + hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr); |
| 108 | + |
| 109 | + if(dropout) { |
| 110 | + for(int j=0; j<dy.length; j++) { |
| 111 | + dy[j] *= dropout_masks.get(i)[j]; |
| 112 | + } |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + } |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + |
| 121 | + public void pretest(double p_dropout) { |
| 122 | + for(int i=0; i<n_layers; i++) { |
| 123 | + int in; |
| 124 | + int out; |
| 125 | + |
| 126 | + if (i == 0) in = n_in; |
| 127 | + else in = hidden_layer_sizes[i]; |
| 128 | + |
| 129 | + if (i == n_layers - 1) out = n_out; |
| 130 | + else out = hidden_layer_sizes[i+1]; |
| 131 | + |
| 132 | + |
| 133 | + for (int l = 0; l < out; l++) { |
| 134 | + for (int m = 0; m < in; m++) { |
| 135 | + hiddenLayers[i].W[l][m] *= 1 - p_dropout; |
| 136 | + } |
| 137 | + } |
| 138 | + } |
| 139 | + } |
| 140 | + |
| 141 | + |
| 142 | + public void predict(double[] x, double[] y) { |
| 143 | + double[] layer_input; |
| 144 | + double[] layer_output = new double[0]; |
| 145 | + |
| 146 | + for(int i=0; i<n_layers; i++) { |
| 147 | + |
| 148 | + if(i == 0) layer_input = x; |
| 149 | + else layer_input = layer_output.clone(); |
| 150 | + |
| 151 | + layer_output = new double[hidden_layer_sizes[i]]; |
| 152 | + |
| 153 | + hiddenLayers[i].forward(layer_input, layer_output); |
| 154 | + } |
| 155 | + |
| 156 | + logisticLayer.predict(layer_output, y); |
| 157 | + } |
| 158 | + |
| 159 | + |
| 160 | + private static void test_dropout() { |
| 161 | + Random rng = new Random(123); |
| 162 | + |
| 163 | + double learning_rate = 0.1; |
| 164 | + int n_epochs = 5000; |
| 165 | + |
| 166 | + int train_N = 4; |
| 167 | + int test_N = 4; |
| 168 | + int n_in = 2; |
| 169 | + int[] hidden_layer_sizes = {10, 10}; |
| 170 | + int n_out = 2; |
| 171 | + |
| 172 | + boolean dropout = true; |
| 173 | + double p_dropout = 0.5; |
| 174 | + |
| 175 | + |
| 176 | + double[][] train_X = { |
| 177 | + {0., 0.}, |
| 178 | + {0., 1.}, |
| 179 | + {1., 0.}, |
| 180 | + {1., 1.}, |
| 181 | + }; |
| 182 | + |
| 183 | + int[][] train_Y = { |
| 184 | + {0, 1}, |
| 185 | + {1, 0}, |
| 186 | + {1, 0}, |
| 187 | + {0, 1}, |
| 188 | + }; |
| 189 | + |
| 190 | + // construct Dropout |
| 191 | + Dropout classifier = new Dropout(train_N, n_in, hidden_layer_sizes, n_out, rng, "ReLU"); |
| 192 | + |
| 193 | + // train |
| 194 | + classifier.train(n_epochs, train_X, train_Y, dropout, p_dropout, learning_rate); |
| 195 | + |
| 196 | + // pretest |
| 197 | + if(dropout) classifier.pretest(p_dropout); |
| 198 | + |
| 199 | + |
| 200 | + // test data |
| 201 | + double[][] test_X = { |
| 202 | + {0., 0.}, |
| 203 | + {0., 1.}, |
| 204 | + {1., 0.}, |
| 205 | + {1., 1.}, |
| 206 | + }; |
| 207 | + |
| 208 | + double[][] test_Y = new double[test_N][n_out]; |
| 209 | + |
| 210 | + // test |
| 211 | + for(int i=0; i<test_N; i++) { |
| 212 | + classifier.predict(test_X[i], test_Y[i]); |
| 213 | + for(int j=0; j<n_out; j++) { |
| 214 | + System.out.print(test_Y[i][j] + " "); |
| 215 | + } |
| 216 | + System.out.println(); |
| 217 | + } |
| 218 | + |
| 219 | + } |
| 220 | + |
| 221 | + |
| 222 | + public static void main(String[] args) { |
| 223 | + test_dropout(); |
| 224 | + } |
| 225 | +} |
0 commit comments