Skip to content

Commit 0911fa0

Browse files
committed
MLP.java, Dropout.java
1 parent 8872adf commit 0911fa0

File tree

10 files changed

+640
-39
lines changed

10 files changed

+640
-39
lines changed

java/src/DeepLearning/DBN.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ public class DBN {
99
public int[] hidden_layer_sizes;
1010
public int n_outs;
1111
public int n_layers;
12-
public HiddenLayer[] sigmoid_layers;
12+
public HiddenLayerDiscrete[] sigmoid_layers;
1313
public RBM[] rbm_layers;
14-
public LogisticRegression log_layer;
14+
public LogisticRegressionDiscrete log_layer;
1515
public Random rng;
1616

1717

@@ -24,7 +24,7 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers,
2424
this.n_outs = n_outs;
2525
this.n_layers = n_layers;
2626

27-
this.sigmoid_layers = new HiddenLayer[n_layers];
27+
this.sigmoid_layers = new HiddenLayerDiscrete[n_layers];
2828
this.rbm_layers = new RBM[n_layers];
2929

3030
if(rng == null) this.rng = new Random(1234);
@@ -39,14 +39,14 @@ public DBN(int N, int n_ins, int[] hidden_layer_sizes, int n_outs, int n_layers,
3939
}
4040

4141
// construct sigmoid_layer
42-
this.sigmoid_layers[i] = new HiddenLayer(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
42+
this.sigmoid_layers[i] = new HiddenLayerDiscrete(this.N, input_size, this.hidden_layer_sizes[i], null, null, rng);
4343

4444
// construct rbm_layer
4545
this.rbm_layers[i] = new RBM(this.N, input_size, this.hidden_layer_sizes[i], this.sigmoid_layers[i].W, this.sigmoid_layers[i].b, null, rng);
4646
}
4747

48-
// layer for output using DNN.LogisticRegression
49-
this.log_layer = new LogisticRegression(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
48+
// layer for output using Logistic Regression
49+
this.log_layer = new LogisticRegressionDiscrete(this.N, this.hidden_layer_sizes[this.n_layers-1], this.n_outs);
5050
}
5151

5252
public void pretrain(int[][] train_X, double lr, int k, int epochs) {

java/src/DeepLearning/Dropout.java

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
package DeepLearning;
2+
3+
import java.util.Random;
4+
import java.util.List;
5+
import java.util.ArrayList;
6+
7+
public class Dropout {
8+
public int N;
9+
public int n_in;
10+
public int[] hidden_layer_sizes;
11+
public int n_out;
12+
public int n_layers;
13+
public HiddenLayer[] hiddenLayers;
14+
public LogisticRegression logisticLayer;
15+
public Random rng;
16+
17+
18+
public Dropout(int N, int n_in, int[] hidden_layer_sizes, int n_out, Random rng, String activation) {
19+
this.N = N;
20+
this.n_in = n_in;
21+
this.hidden_layer_sizes = hidden_layer_sizes;
22+
this.n_layers = hidden_layer_sizes.length;
23+
this.n_out = n_out;
24+
25+
this.hiddenLayers = new HiddenLayer[n_layers];
26+
27+
if (rng == null) rng = new Random(1234);
28+
this.rng = rng;
29+
30+
if (activation == null) activation = "ReLU";
31+
32+
// construct multi-layer
33+
int input_size;
34+
for(int i=0; i<this.n_layers; i++) {
35+
// layer_size
36+
if(i == 0) {
37+
input_size = n_in;
38+
} else {
39+
input_size = hidden_layer_sizes[i-1];
40+
}
41+
42+
// construct hiddenLayer
43+
this.hiddenLayers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], null, null, rng, activation);
44+
45+
}
46+
47+
// construct logisticLayer
48+
this.logisticLayer = new LogisticRegression(N, hidden_layer_sizes[this.n_layers-1], n_out);
49+
50+
}
51+
52+
public void train(int epochs, double[][] train_X, int[][] train_Y, boolean dropout, double p_dropout, double lr) {
53+
List<int[]> dropout_masks;
54+
List<double[]> layer_inputs;
55+
double[] layer_input;
56+
double[] layer_output = new double[0];
57+
58+
for(int epoch=0; epoch<epochs; epoch++) {
59+
60+
for(int n=0; n<N; n++) {
61+
62+
dropout_masks = new ArrayList<>(n_layers);
63+
layer_inputs = new ArrayList<>(n_layers+1); // +1 for logistic layer
64+
65+
// forward hiddenLayers
66+
for(int i=0; i<n_layers; i++) {
67+
68+
if(i == 0) layer_input = train_X[n];
69+
else layer_input = layer_output.clone();
70+
71+
layer_inputs.add(layer_input.clone());
72+
73+
layer_output = new double[hidden_layer_sizes[i]];
74+
hiddenLayers[i].forward(layer_input, layer_output);
75+
76+
if(dropout) {
77+
int[] mask;
78+
mask = hiddenLayers[i].dropout(layer_output.length, p_dropout, rng);
79+
for(int j=0; j<layer_output.length; j++) layer_output[j] *= mask[j];
80+
81+
dropout_masks.add(mask.clone());
82+
}
83+
84+
}
85+
86+
87+
// forward & backward logisticLayer
88+
double[] logistic_layer_dy; // = new double[n_out];
89+
logistic_layer_dy = logisticLayer.train(layer_output, train_Y[n], lr); //, logistic_layer_dy);
90+
layer_inputs.add(layer_output.clone());
91+
92+
// backward hiddenLayers
93+
double[] prev_dy = logistic_layer_dy;
94+
double[][] prev_W;
95+
double[] dy = new double[0];
96+
97+
for(int i=n_layers-1; i>=0; i--) {
98+
99+
if(i == n_layers-1) {
100+
prev_W = logisticLayer.W;
101+
} else {
102+
prev_dy = dy.clone();
103+
prev_W = hiddenLayers[i+1].W;
104+
}
105+
106+
dy = new double[hidden_layer_sizes[i]];
107+
hiddenLayers[i].backward(layer_inputs.get(i), dy, layer_inputs.get(i+1), prev_dy, prev_W, lr);
108+
109+
if(dropout) {
110+
for(int j=0; j<dy.length; j++) {
111+
dy[j] *= dropout_masks.get(i)[j];
112+
}
113+
}
114+
}
115+
116+
}
117+
}
118+
}
119+
120+
121+
public void pretest(double p_dropout) {
122+
for(int i=0; i<n_layers; i++) {
123+
int in;
124+
int out;
125+
126+
if (i == 0) in = n_in;
127+
else in = hidden_layer_sizes[i];
128+
129+
if (i == n_layers - 1) out = n_out;
130+
else out = hidden_layer_sizes[i+1];
131+
132+
133+
for (int l = 0; l < out; l++) {
134+
for (int m = 0; m < in; m++) {
135+
hiddenLayers[i].W[l][m] *= 1 - p_dropout;
136+
}
137+
}
138+
}
139+
}
140+
141+
142+
public void predict(double[] x, double[] y) {
143+
double[] layer_input;
144+
double[] layer_output = new double[0];
145+
146+
for(int i=0; i<n_layers; i++) {
147+
148+
if(i == 0) layer_input = x;
149+
else layer_input = layer_output.clone();
150+
151+
layer_output = new double[hidden_layer_sizes[i]];
152+
153+
hiddenLayers[i].forward(layer_input, layer_output);
154+
}
155+
156+
logisticLayer.predict(layer_output, y);
157+
}
158+
159+
160+
private static void test_dropout() {
161+
Random rng = new Random(123);
162+
163+
double learning_rate = 0.1;
164+
int n_epochs = 5000;
165+
166+
int train_N = 4;
167+
int test_N = 4;
168+
int n_in = 2;
169+
int[] hidden_layer_sizes = {10, 10};
170+
int n_out = 2;
171+
172+
boolean dropout = true;
173+
double p_dropout = 0.5;
174+
175+
176+
double[][] train_X = {
177+
{0., 0.},
178+
{0., 1.},
179+
{1., 0.},
180+
{1., 1.},
181+
};
182+
183+
int[][] train_Y = {
184+
{0, 1},
185+
{1, 0},
186+
{1, 0},
187+
{0, 1},
188+
};
189+
190+
// construct Dropout
191+
Dropout classifier = new Dropout(train_N, n_in, hidden_layer_sizes, n_out, rng, "ReLU");
192+
193+
// train
194+
classifier.train(n_epochs, train_X, train_Y, dropout, p_dropout, learning_rate);
195+
196+
// pretest
197+
if(dropout) classifier.pretest(p_dropout);
198+
199+
200+
// test data
201+
double[][] test_X = {
202+
{0., 0.},
203+
{0., 1.},
204+
{1., 0.},
205+
{1., 1.},
206+
};
207+
208+
double[][] test_Y = new double[test_N][n_out];
209+
210+
// test
211+
for(int i=0; i<test_N; i++) {
212+
classifier.predict(test_X[i], test_Y[i]);
213+
for(int j=0; j<n_out; j++) {
214+
System.out.print(test_Y[i][j] + " ");
215+
}
216+
System.out.println();
217+
}
218+
219+
}
220+
221+
222+
public static void main(String[] args) {
223+
test_dropout();
224+
}
225+
}
Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package DeepLearning;
22

33
import java.util.Random;
4+
import java.util.function.DoubleFunction;
45
import static DeepLearning.utils.*;
56

67
public class HiddenLayer {
@@ -10,17 +11,18 @@ public class HiddenLayer {
1011
public double[][] W;
1112
public double[] b;
1213
public Random rng;
14+
public DoubleFunction<Double> activation;
15+
public DoubleFunction<Double> dactivation;
1316

14-
15-
public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng) {
17+
public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random rng, String activation) {
1618
this.N = N;
1719
this.n_in = n_in;
1820
this.n_out = n_out;
1921

20-
if(rng == null) this.rng = new Random(1234);
22+
if (rng == null) this.rng = new Random(1234);
2123
else this.rng = rng;
2224

23-
if(W == null) {
25+
if (W == null) {
2426
this.W = new double[n_out][n_in];
2527
double a = 1.0 / this.n_in;
2628

@@ -33,22 +35,72 @@ public HiddenLayer(int N, int n_in, int n_out, double[][] W, double[] b, Random
3335
this.W = W;
3436
}
3537

36-
if(b == null) this.b = new double[n_out];
38+
if (b == null) this.b = new double[n_out];
3739
else this.b = b;
40+
41+
if (activation == "sigmoid" || activation == null) {
42+
this.activation = (double x) -> sigmoid(x);
43+
this.dactivation = (double x) -> dsigmoid(x);
44+
45+
} else if (activation == "tanh") {
46+
this.activation = (double x) -> tanh(x);
47+
this.dactivation = (double x) -> dtanh(x);
48+
} else if (activation == "ReLU") {
49+
this.activation = (double x) -> ReLU(x);
50+
this.dactivation = (double x) -> dReLU(x);
51+
} else {
52+
throw new IllegalArgumentException("activation function not supported");
53+
}
54+
3855
}
3956

40-
public double output(int[] input, double[] w, double b) {
57+
public double output(double[] input, double[] w, double b) {
4158
double linear_output = 0.0;
4259
for(int j=0; j<n_in; j++) {
4360
linear_output += w[j] * input[j];
4461
}
4562
linear_output += b;
46-
return sigmoid(linear_output);
63+
64+
return activation.apply(linear_output);
65+
}
66+
67+
68+
public void forward(double[] input, double[] output) {
69+
for(int i=0; i<n_out; i++) {
70+
output[i] = this.output(input, W[i], b[i]);
71+
}
4772
}
4873

49-
public void sample_h_given_v(int[] input, int[] sample) {
74+
public void backward(double[] input, double[] dy, double[] prev_layer_input, double[] prev_layer_dy, double[][] prev_layer_W, double lr) {
75+
if(dy == null) dy = new double[n_out];
76+
77+
int prev_n_in = n_out;
78+
int prev_n_out = prev_layer_dy.length;
79+
80+
for(int i=0; i<prev_n_in; i++) {
81+
dy[i] = 0;
82+
for(int j=0; j<prev_n_out; j++) {
83+
dy[i] += prev_layer_dy[j] * prev_layer_W[j][i];
84+
}
85+
86+
dy[i] *= dactivation.apply(prev_layer_input[i]);
87+
}
88+
5089
for(int i=0; i<n_out; i++) {
51-
sample[i] = binomial(1, output(input, W[i], b[i]), rng);
90+
for(int j=0; j<n_in; j++) {
91+
W[i][j] += lr * dy[i] * input[j] / N;
92+
}
93+
b[i] += lr * dy[i] / N;
94+
}
95+
}
96+
97+
public int[] dropout(int size, double p, Random rng) {
98+
int[] mask = new int[size];
99+
100+
for(int i=0; i<size; i++) {
101+
mask[i] = binomial(1, p, rng);
52102
}
103+
104+
return mask;
53105
}
54106
}

0 commit comments

Comments
 (0)