refactoring h(t) = y(t) + y(t-1)

huergasi · huergasi · commit fc9769f47b20 · 2016-01-19T09:12:57.000-05:00
diff --git a/scripts/rnn.py b/scripts/rnn.py
@@ -11,7 +11,8 @@
 class RNN(object):
 
     def __init__(self, n_inputs, n_hidden, n_output,
-                 activation=T.tanh, L1_reg=0.0001, L2_reg=0.0001,):
+                 activation=T.tanh, L1_reg=0.0001, L2_reg=0.0001,
+                 window=2):
 
         '''
 
@@ -24,21 +25,21 @@ def __init__(self, n_inputs, n_hidden, n_output,
         # parameters
         self.L1_reg = L1_reg
         self.L2_reg = L2_reg
+        self.wSize = window
 
-        # theta_tm1, weight matrix from input to hidden units
-        self.theta_tm1 = theano.shared(name='theta_tm1',
+        # theta1, weight matrix from input to hidden units
+        self.theta1 = theano.shared(name='theta1',
                                     value=0.2 * np.random.uniform(-1.0, 1.0,
                                     (n_inputs, n_hidden))
                                     .astype(theano.config.floatX))
 
-        # theta1, weight matrix from input to hidden units
-        self.theta1 = theano.shared(name='theta1',
+        # thetah, recurrent weights matrix (hidden to hidden)
+        self.thetah1 = theano.shared(name='thetah1',
                                     value=0.2 * np.random.uniform(-1.0, 1.0,
-                                    (n_inputs + (n_hidden * 1), n_hidden))
+                                    (n_hidden, n_hidden))
                                     .astype(theano.config.floatX))
 
-        # thetah, recurrent weights matrix (hidden to hidden)
-        self.thetah = theano.shared(name='thetah',
+        self.thetah2 = theano.shared(name='thetah2',
                                     value=0.2 * np.random.uniform(-1.0, 1.0,
                                     (n_hidden, n_hidden))
                                     .astype(theano.config.floatX))
@@ -61,11 +62,12 @@ def __init__(self, n_inputs, n_hidden, n_output,
 
         # h0, hidden states
         self.h0 = theano.shared(name='h0',
-                                value=np.zeros(n_hidden,
+                                value=np.zeros((2,n_hidden),
                                 dtype=theano.config.floatX))
 
         # all the parameters
-        self.params = [self.theta1, self.theta_tm1, self.thetah, self.theta2, self.bh, self.bout, self.h0]
+        self.params = [self.theta1, self.thetah1, self.thetah2,
+                       self.theta2, self.bh, self.bout, self.h0]
 
         # activation function
         self.activation = activation
@@ -75,40 +77,37 @@ def __init__(self, n_inputs, n_hidden, n_output,
         y = T.dmatrix('y')
 
         # forward pass with recurrence y_t_minus_1 (only 1 time step in context window)
-        def forward_pass(x_tm1, x_t, h_tm1):
+        def forward_pass(x_t, h_tm2, h_tm1):
 
-            # hidden states as t-1
-            h_x_tm1 = self.activation(T.dot(x_tm1, self.theta_tm1));
-
-            # hidden states as t
-            new_x_t = T.concatenate([x_t, h_x_tm1])
-            h_t = self.activation(T.dot(new_x_t, self.theta1) +
-                                  T.dot(h_tm1, self.thetah) + self.bh)
+            h_t = self.activation(T.dot(x_t, self.theta1) +
+                                  (T.dot(h_tm2, self.thetah1) + self.bh) +
+                                  (T.dot(h_tm1, self.thetah2) + self.bh)
+                                  )
 
             # output at t
             y_t = T.dot(h_t, self.theta2) + self.bout
 
             return h_t, y_t
 
         [h, y_pred], _ = theano.scan(fn=forward_pass,
-                                sequences=dict(input=x, taps=[-1,-0]),
-                                outputs_info=[self.h0, None],
-                                n_steps=x.shape[0]-1)
+                                sequences=x,
+                                outputs_info=[dict(initial=self.h0, taps=[-2,-1]), None],
+                                n_steps=x.shape[0])
 
         # let's use L1 and L2 regularization
 
         # L1 regularization
         self.L1 = 0
-        self.L1 += abs(self.thetah.sum())
-        self.L1 += abs(self.theta_tm1.sum())
+        self.L1 += abs(self.thetah1.sum())
+        self.L1 += abs(self.thetah2.sum())
         self.L1 += abs(self.theta1.sum())
         self.L1 += abs(self.theta2.sum())
 
         # square of L2 norm ; one regularization option is to enforce
         # square of L2 norm to be small
         self.L2_sqr = 0
-        self.L2_sqr += (self.thetah ** 2).sum()
-        self.L2_sqr += (self.theta_tm1 ** 2).sum()
+        self.L2_sqr += (self.thetah1 ** 2).sum()
+        self.L2_sqr += (self.thetah2 ** 2).sum()
         self.L2_sqr += (self.theta1 ** 2).sum()
         self.L2_sqr += (self.theta2 ** 2).sum()
 
@@ -139,9 +138,8 @@ def forward_pass(x_tm1, x_t, h_tm1):
 
 
     def train(self, x, y, learning_rate):
-        new_y = y[1:]
-        self.batch_train(x, new_y, learning_rate)
-        self.batch_train_print(x, new_y, learning_rate)
+        self.batch_train(x, y, learning_rate)
+        self.batch_train_print(x, y, learning_rate)
 
     def mse(self, y):
         # error between output and target
@@ -156,14 +154,14 @@ def main(params=None):
             'decay' : True,
             'n_hidden' : 50,
             'seed' : 234,
-            'epochs' : 1
+            'epochs' : 2
         }
 
     print params
 
     training_data, validation_data, testing_data = p.getdata()
     training_data_x, training_data_y  = training_data
-
+    testing_data_x, testing_data_y = testing_data
     np.random.seed(params['seed'])
     random.seed(params['seed'])
 
@@ -176,9 +174,9 @@ def main(params=None):
         total_cost = 0;
         for batch_index in xrange(training_data_x.__len__()):
             rnn.train(training_data_x[batch_index], training_data_y[batch_index], params['lr'])
-        train_loss = [rnn.compute_training_error(training_data_x[i], training_data_y[i][1:]) for i in xrange(training_data_x.__len__())]
+        train_loss = [rnn.compute_training_error(testing_data_x[i], testing_data_y[i]) for i in xrange(testing_data_x.__len__())]
         total_training_loss = np.mean(train_loss)
-        print 'epoch %i with average cost %f' % (epoch, total_training_loss)
+        print 'epoch %i with test mse %f' % (epoch, total_training_loss)
 
 if __name__ == '__main__':
     main()