Skip to content

Commit fc9769f

Browse files
author
huergasi
committed
refactoring h(t) = y(t) + y(t-1)
1 parent ac1dcee commit fc9769f

1 file changed

Lines changed: 30 additions & 32 deletions

File tree

scripts/rnn.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
class RNN(object):
1212

1313
def __init__(self, n_inputs, n_hidden, n_output,
14-
activation=T.tanh, L1_reg=0.0001, L2_reg=0.0001,):
14+
activation=T.tanh, L1_reg=0.0001, L2_reg=0.0001,
15+
window=2):
1516

1617
'''
1718
@@ -24,21 +25,21 @@ def __init__(self, n_inputs, n_hidden, n_output,
2425
# parameters
2526
self.L1_reg = L1_reg
2627
self.L2_reg = L2_reg
28+
self.wSize = window
2729

28-
# theta_tm1, weight matrix from input to hidden units
29-
self.theta_tm1 = theano.shared(name='theta_tm1',
30+
# theta1, weight matrix from input to hidden units
31+
self.theta1 = theano.shared(name='theta1',
3032
value=0.2 * np.random.uniform(-1.0, 1.0,
3133
(n_inputs, n_hidden))
3234
.astype(theano.config.floatX))
3335

34-
# theta1, weight matrix from input to hidden units
35-
self.theta1 = theano.shared(name='theta1',
36+
# thetah, recurrent weights matrix (hidden to hidden)
37+
self.thetah1 = theano.shared(name='thetah1',
3638
value=0.2 * np.random.uniform(-1.0, 1.0,
37-
(n_inputs + (n_hidden * 1), n_hidden))
39+
(n_hidden, n_hidden))
3840
.astype(theano.config.floatX))
3941

40-
# thetah, recurrent weights matrix (hidden to hidden)
41-
self.thetah = theano.shared(name='thetah',
42+
self.thetah2 = theano.shared(name='thetah2',
4243
value=0.2 * np.random.uniform(-1.0, 1.0,
4344
(n_hidden, n_hidden))
4445
.astype(theano.config.floatX))
@@ -61,11 +62,12 @@ def __init__(self, n_inputs, n_hidden, n_output,
6162

6263
# h0, hidden states
6364
self.h0 = theano.shared(name='h0',
64-
value=np.zeros(n_hidden,
65+
value=np.zeros((2,n_hidden),
6566
dtype=theano.config.floatX))
6667

6768
# all the parameters
68-
self.params = [self.theta1, self.theta_tm1, self.thetah, self.theta2, self.bh, self.bout, self.h0]
69+
self.params = [self.theta1, self.thetah1, self.thetah2,
70+
self.theta2, self.bh, self.bout, self.h0]
6971

7072
# activation function
7173
self.activation = activation
@@ -75,40 +77,37 @@ def __init__(self, n_inputs, n_hidden, n_output,
7577
y = T.dmatrix('y')
7678

7779
# forward pass with recurrence y_t_minus_1 (only 1 time step in context window)
78-
def forward_pass(x_tm1, x_t, h_tm1):
80+
def forward_pass(x_t, h_tm2, h_tm1):
7981

80-
# hidden states as t-1
81-
h_x_tm1 = self.activation(T.dot(x_tm1, self.theta_tm1));
82-
83-
# hidden states as t
84-
new_x_t = T.concatenate([x_t, h_x_tm1])
85-
h_t = self.activation(T.dot(new_x_t, self.theta1) +
86-
T.dot(h_tm1, self.thetah) + self.bh)
82+
h_t = self.activation(T.dot(x_t, self.theta1) +
83+
(T.dot(h_tm2, self.thetah1) + self.bh) +
84+
(T.dot(h_tm1, self.thetah2) + self.bh)
85+
)
8786

8887
# output at t
8988
y_t = T.dot(h_t, self.theta2) + self.bout
9089

9190
return h_t, y_t
9291

9392
[h, y_pred], _ = theano.scan(fn=forward_pass,
94-
sequences=dict(input=x, taps=[-1,-0]),
95-
outputs_info=[self.h0, None],
96-
n_steps=x.shape[0]-1)
93+
sequences=x,
94+
outputs_info=[dict(initial=self.h0, taps=[-2,-1]), None],
95+
n_steps=x.shape[0])
9796

9897
# let's use L1 and L2 regularization
9998

10099
# L1 regularization
101100
self.L1 = 0
102-
self.L1 += abs(self.thetah.sum())
103-
self.L1 += abs(self.theta_tm1.sum())
101+
self.L1 += abs(self.thetah1.sum())
102+
self.L1 += abs(self.thetah2.sum())
104103
self.L1 += abs(self.theta1.sum())
105104
self.L1 += abs(self.theta2.sum())
106105

107106
# square of L2 norm ; one regularization option is to enforce
108107
# square of L2 norm to be small
109108
self.L2_sqr = 0
110-
self.L2_sqr += (self.thetah ** 2).sum()
111-
self.L2_sqr += (self.theta_tm1 ** 2).sum()
109+
self.L2_sqr += (self.thetah1 ** 2).sum()
110+
self.L2_sqr += (self.thetah2 ** 2).sum()
112111
self.L2_sqr += (self.theta1 ** 2).sum()
113112
self.L2_sqr += (self.theta2 ** 2).sum()
114113

@@ -139,9 +138,8 @@ def forward_pass(x_tm1, x_t, h_tm1):
139138

140139

141140
def train(self, x, y, learning_rate):
142-
new_y = y[1:]
143-
self.batch_train(x, new_y, learning_rate)
144-
self.batch_train_print(x, new_y, learning_rate)
141+
self.batch_train(x, y, learning_rate)
142+
self.batch_train_print(x, y, learning_rate)
145143

146144
def mse(self, y):
147145
# error between output and target
@@ -156,14 +154,14 @@ def main(params=None):
156154
'decay' : True,
157155
'n_hidden' : 50,
158156
'seed' : 234,
159-
'epochs' : 1
157+
'epochs' : 2
160158
}
161159

162160
print params
163161

164162
training_data, validation_data, testing_data = p.getdata()
165163
training_data_x, training_data_y = training_data
166-
164+
testing_data_x, testing_data_y = testing_data
167165
np.random.seed(params['seed'])
168166
random.seed(params['seed'])
169167

@@ -176,9 +174,9 @@ def main(params=None):
176174
total_cost = 0;
177175
for batch_index in xrange(training_data_x.__len__()):
178176
rnn.train(training_data_x[batch_index], training_data_y[batch_index], params['lr'])
179-
train_loss = [rnn.compute_training_error(training_data_x[i], training_data_y[i][1:]) for i in xrange(training_data_x.__len__())]
177+
train_loss = [rnn.compute_training_error(testing_data_x[i], testing_data_y[i]) for i in xrange(testing_data_x.__len__())]
180178
total_training_loss = np.mean(train_loss)
181-
print 'epoch %i with average cost %f' % (epoch, total_training_loss)
179+
print 'epoch %i with test mse %f' % (epoch, total_training_loss)
182180

183181
if __name__ == '__main__':
184182
main()

0 commit comments

Comments
 (0)