root-project · simonpf · Jul 17, 2016 · Jul 17, 2016 · Jul 17, 2016 · Jul 17, 2016
@@ -282,7 +282,7 @@ template<typename Architecture_t>
 {
    std::cout << "Width: " << fWeights.GetNrows();
    std::cout << ", activation function: ";
-   std::cout << static_cast<char>(fF) << std::endl;
+   std::cout << static_cast<int>(fF) << std::endl;
 }
 
 //______________________________________________________________________________

@@ -87,9 +87,9 @@ class TGradientDescent
       rate \f$\alpha\f$ and subtracted from the weights and bias values of each
       layer. */
     template <typename Net_t>
-    Scalar_t Step(Net_t &net,
-                  Matrix_t &input,
-                  const Matrix_t &output);
+    void Step(Net_t &net,
+              Matrix_t &input,
+              const Matrix_t &output);
     /** Does not evaluate the loss and therefore not trigger a possible synchronization
      *  with the device. Trains the weights of each layer, but only the bias terms of
      *  the first layer for compatibility with the previous implementation. */
@@ -197,13 +197,13 @@ template <typename Data_t, typename Net_t>
 //______________________________________________________________________________
 template<typename Architecture_t>
     template <typename Net_t>
-    auto inline TGradientDescent<Architecture_t>::Step(Net_t & net,
+    void inline TGradientDescent<Architecture_t>::Step(Net_t & net,
                                                        Matrix_t &input,
                                                        const Matrix_t &output)
-    -> Scalar_t
 {
-    Scalar_t loss = net.Loss(input, output);
-    fTrainingError = loss;
+    //Scalar_t loss = net.Loss(input, output);
+    //fTrainingError = loss;
+    net.Forward(input);
     net.Backward(input, output);
 
     for (size_t i = 0; i < net.GetDepth(); i++)
@@ -216,7 +216,6 @@ template<typename Architecture_t>
                                  layer.GetBiasGradients(),
                                  -fLearningRate);
     }
-    return loss;
 }
 
 //______________________________________________________________________________

@@ -687,6 +687,9 @@ void TMVA::MethodDNN::TrainGPU()
       case DNN::EnumFunction::RELU :
           GPUNet.AddLayer((*itLayout).first, EActivationFunction::RELU);
           break;
+      case DNN::EnumFunction::TANH :
+          GPUNet.AddLayer((*itLayout).first, EActivationFunction::TANH);
+          break;
       case DNN::EnumFunction::SYMMRELU :
           GPUNet.AddLayer((*itLayout).first, EActivationFunction::SYMMRELU);
           break;

@@ -27,17 +27,10 @@ int main()
    using Matrix_t = TMatrixT<Double_t>;
    using Net_t    = TNet<TCuda>;
 
-   Matrix_t XTrain(4000,20), YTrain(4000,20), XTest(20,20), YTest(20,20), W(20, 20);
+   Matrix_t XTrain(100000,20), YTrain(100000,20), XTest(20,20), YTest(20,20), W(20, 20);
 
    randomMatrix(W);
 
-   for (size_t i = 0; i < 4000; i++) {
-      for (size_t j = 0; j < 20; j++) {
-         XTrain(i,j) = i;
-         YTrain(i,j) = i;
-      }
-   }
-
    randomMatrix(XTrain);
    randomMatrix(XTest);
 
@@ -47,23 +40,26 @@ int main()
    MatrixInput_t trainData(XTrain, YTrain);
    MatrixInput_t testData(XTest, YTest);
 
-   Net_t net(20, 20, ELossFunction::MEANSQUAREDERROR);
-   net.AddLayer(100, EActivationFunction::IDENTITY);
-   net.AddLayer(100, EActivationFunction::IDENTITY);
+   Net_t net(1000, 20, ELossFunction::MEANSQUAREDERROR);
+
+   net.AddLayer(200, EActivationFunction::IDENTITY);
+   net.AddLayer(200, EActivationFunction::IDENTITY);
+   net.AddLayer(200, EActivationFunction::IDENTITY);
    net.AddLayer(20, EActivationFunction::IDENTITY);
    net.Initialize(EInitialization::GAUSS);
+   auto testnet = net.CreateClone(20);
 
    TGradientDescent<TCuda> minimizer(0.001, 20, 20);
-   minimizer.Train(trainData, 4000, testData, 20, net);
+   minimizer.Train(trainData, 100000, testData, 20, net);
 
    TMatrixT<Double_t> I(20,20); identityMatrix(I);
    TCudaMatrix ICuda(I);
 
-   net.Forward(ICuda);
+   testnet.Forward(ICuda);
 
    TMatrixT<Double_t> WT(20, 20);
    WT.Transpose(W);
 
-   auto error = maximumRelativeError((TMatrixT<Double_t>) net.GetOutput(), WT);
+   auto error = maximumRelativeError((TMatrixT<Double_t>) testnet.GetOutput(), WT);
    std::cout << "Maximum relative error: " << error << std::endl;
 }