1111class RNN (object ):
1212
1313 def __init__ (self , n_inputs , n_hidden , n_output ,
14- activation = T .tanh , L1_reg = 0.0001 , L2_reg = 0.0001 ,):
14+ activation = T .tanh , L1_reg = 0.0001 , L2_reg = 0.0001 ,
15+ window = 2 ):
1516
1617 '''
1718
@@ -24,21 +25,21 @@ def __init__(self, n_inputs, n_hidden, n_output,
2425 # parameters
2526 self .L1_reg = L1_reg
2627 self .L2_reg = L2_reg
28+ self .wSize = window
2729
28- # theta_tm1 , weight matrix from input to hidden units
29- self .theta_tm1 = theano .shared (name = 'theta_tm1 ' ,
30+ # theta1 , weight matrix from input to hidden units
31+ self .theta1 = theano .shared (name = 'theta1 ' ,
3032 value = 0.2 * np .random .uniform (- 1.0 , 1.0 ,
3133 (n_inputs , n_hidden ))
3234 .astype (theano .config .floatX ))
3335
34- # theta1, weight matrix from input to hidden units
35- self .theta1 = theano .shared (name = 'theta1 ' ,
36+ # thetah, recurrent weights matrix (hidden to hidden)
37+ self .thetah1 = theano .shared (name = 'thetah1 ' ,
3638 value = 0.2 * np .random .uniform (- 1.0 , 1.0 ,
37- (n_inputs + ( n_hidden * 1 ) , n_hidden ))
39+ (n_hidden , n_hidden ))
3840 .astype (theano .config .floatX ))
3941
40- # thetah, recurrent weights matrix (hidden to hidden)
41- self .thetah = theano .shared (name = 'thetah' ,
42+ self .thetah2 = theano .shared (name = 'thetah2' ,
4243 value = 0.2 * np .random .uniform (- 1.0 , 1.0 ,
4344 (n_hidden , n_hidden ))
4445 .astype (theano .config .floatX ))
@@ -61,11 +62,12 @@ def __init__(self, n_inputs, n_hidden, n_output,
6162
6263 # h0, hidden states
6364 self .h0 = theano .shared (name = 'h0' ,
64- value = np .zeros (n_hidden ,
65+ value = np .zeros (( 2 , n_hidden ) ,
6566 dtype = theano .config .floatX ))
6667
6768 # all the parameters
68- self .params = [self .theta1 , self .theta_tm1 , self .thetah , self .theta2 , self .bh , self .bout , self .h0 ]
69+ self .params = [self .theta1 , self .thetah1 , self .thetah2 ,
70+ self .theta2 , self .bh , self .bout , self .h0 ]
6971
7072 # activation function
7173 self .activation = activation
@@ -75,40 +77,37 @@ def __init__(self, n_inputs, n_hidden, n_output,
7577 y = T .dmatrix ('y' )
7678
7779 # forward pass with recurrence y_t_minus_1 (only 1 time step in context window)
78- def forward_pass (x_tm1 , x_t , h_tm1 ):
80+ def forward_pass (x_t , h_tm2 , h_tm1 ):
7981
80- # hidden states as t-1
81- h_x_tm1 = self .activation (T .dot (x_tm1 , self .theta_tm1 ));
82-
83- # hidden states as t
84- new_x_t = T .concatenate ([x_t , h_x_tm1 ])
85- h_t = self .activation (T .dot (new_x_t , self .theta1 ) +
86- T .dot (h_tm1 , self .thetah ) + self .bh )
82+ h_t = self .activation (T .dot (x_t , self .theta1 ) +
83+ (T .dot (h_tm2 , self .thetah1 ) + self .bh ) +
84+ (T .dot (h_tm1 , self .thetah2 ) + self .bh )
85+ )
8786
8887 # output at t
8988 y_t = T .dot (h_t , self .theta2 ) + self .bout
9089
9190 return h_t , y_t
9291
9392 [h , y_pred ], _ = theano .scan (fn = forward_pass ,
94- sequences = dict ( input = x , taps = [ - 1 , - 0 ]) ,
95- outputs_info = [self .h0 , None ],
96- n_steps = x .shape [0 ]- 1 )
93+ sequences = x ,
94+ outputs_info = [dict ( initial = self .h0 , taps = [ - 2 , - 1 ]) , None ],
95+ n_steps = x .shape [0 ])
9796
9897 # let's use L1 and L2 regularization
9998
10099 # L1 regularization
101100 self .L1 = 0
102- self .L1 += abs (self .thetah .sum ())
103- self .L1 += abs (self .theta_tm1 .sum ())
101+ self .L1 += abs (self .thetah1 .sum ())
102+ self .L1 += abs (self .thetah2 .sum ())
104103 self .L1 += abs (self .theta1 .sum ())
105104 self .L1 += abs (self .theta2 .sum ())
106105
107106 # square of L2 norm ; one regularization option is to enforce
108107 # square of L2 norm to be small
109108 self .L2_sqr = 0
110- self .L2_sqr += (self .thetah ** 2 ).sum ()
111- self .L2_sqr += (self .theta_tm1 ** 2 ).sum ()
109+ self .L2_sqr += (self .thetah1 ** 2 ).sum ()
110+ self .L2_sqr += (self .thetah2 ** 2 ).sum ()
112111 self .L2_sqr += (self .theta1 ** 2 ).sum ()
113112 self .L2_sqr += (self .theta2 ** 2 ).sum ()
114113
@@ -139,9 +138,8 @@ def forward_pass(x_tm1, x_t, h_tm1):
139138
140139
141140 def train (self , x , y , learning_rate ):
142- new_y = y [1 :]
143- self .batch_train (x , new_y , learning_rate )
144- self .batch_train_print (x , new_y , learning_rate )
141+ self .batch_train (x , y , learning_rate )
142+ self .batch_train_print (x , y , learning_rate )
145143
146144 def mse (self , y ):
147145 # error between output and target
@@ -156,14 +154,14 @@ def main(params=None):
156154 'decay' : True ,
157155 'n_hidden' : 50 ,
158156 'seed' : 234 ,
159- 'epochs' : 1
157+ 'epochs' : 2
160158 }
161159
162160 print params
163161
164162 training_data , validation_data , testing_data = p .getdata ()
165163 training_data_x , training_data_y = training_data
166-
164+ testing_data_x , testing_data_y = testing_data
167165 np .random .seed (params ['seed' ])
168166 random .seed (params ['seed' ])
169167
@@ -176,9 +174,9 @@ def main(params=None):
176174 total_cost = 0 ;
177175 for batch_index in xrange (training_data_x .__len__ ()):
178176 rnn .train (training_data_x [batch_index ], training_data_y [batch_index ], params ['lr' ])
179- train_loss = [rnn .compute_training_error (training_data_x [i ], training_data_y [i ][ 1 :] ) for i in xrange (training_data_x .__len__ ())]
177+ train_loss = [rnn .compute_training_error (testing_data_x [i ], testing_data_y [i ]) for i in xrange (testing_data_x .__len__ ())]
180178 total_training_loss = np .mean (train_loss )
181- print 'epoch %i with average cost %f' % (epoch , total_training_loss )
179+ print 'epoch %i with test mse %f' % (epoch , total_training_loss )
182180
183181if __name__ == '__main__' :
184182 main ()
0 commit comments