Added flop counter.

root-project · simonpf · Jul 17, 2016 · Jul 17, 2016 · Jul 17, 2016 · Jul 17, 2016
commit 0a3ae5103a3f539ee45e4fb7cfbfcc56aae01551
@@ -12,6 +12,7 @@
 #define TMVA_DNN_MINIMIZERS
 
 #include "DataLoader.h"
+#include <chrono>
 
 namespace TMVA {
 namespace DNN {
@@ -186,8 +187,13 @@ template <typename Data_t, typename Net_t>
        }
    }
 
+   std::chrono::time_point<std::chrono::system_clock> start, end;
+   start = std::chrono::system_clock::now();
+
    while (!converged)
    {
+      fStepCount++;
+
       size_t netIndex = 0;
       for (auto b : trainLoader) {
          // Perform minimization step.
@@ -199,6 +205,13 @@ template <typename Data_t, typename Net_t>
 
       // Compute test error.
       if ((fStepCount % fTestInterval) == 0) {
+         end   = std::chrono::system_clock::now();
+         std::chrono::duration<double> elapsed_seconds = end - start;
+         start = std::chrono::system_clock::now();
+         double seconds = elapsed_seconds.count();
+         std::cout << "Elapsed time for " << fTestInterval << " Epochs: "
+                   << seconds << " [s] => " << net.GetNFlops() * 1e-6 / seconds
+                   << " GFlop/s" << std::endl;
          auto b = *testLoader.begin();
          auto inputMatrix  = b.GetInput();
          auto outputMatrix = b.GetOutput();
@@ -207,7 +220,7 @@ template <typename Data_t, typename Net_t>
          std::cout << fStepCount << ": " << loss << std::endl;
          converged = HasConverged();
       }
-      fStepCount++;
+
    }
    return fMinimumError;
 }

@@ -125,6 +125,8 @@ template<typename Architecture_t, typename Layer_t = TLayer<Architecture_t>>
     * function f to the activation of the last layer in the network. */
    inline void Prediction(Matrix_t &Y_hat, EOutputFunction f) const;
 
+   Scalar_t            GetNFlops();
+
    size_t              GetDepth() const         {return fLayers.size();}
    size_t              GetBatchSize() const     {return fBatchSize;}
    Layer_t &           GetLayer(size_t i)       {return fLayers[i];}
@@ -309,7 +311,37 @@ template<typename Architecture_t, typename Layer_t>
 }
 
 //______________________________________________________________________________
+template<typename Architecture_t, typename Layer_t>
+auto TNet<Architecture_t, Layer_t>::GetNFlops()
+   -> Scalar_t
+{
+   Scalar_t flops = 0;
+
+   Scalar_t nb  = (Scalar_t) fBatchSize;
+   Scalar_t nlp = (Scalar_t) fInputWidth;
+
+   for(size_t i = 0; i < fLayers.size(); i++) {
+      Layer_t & layer = fLayers[i];
+      Scalar_t nl = (Scalar_t) layer.GetWidth();
+
+      // Forward propagation.
+      flops += nb * nl * (2.0 * nlp - 1); // Matrix mult.
+      flops += nb * nl;                   // Add bias values.
+      flops += 2 * nb * nl;               // Apply activation function and compute
+                                          // derivative.
+      // Backward propagation.
+      flops += nb * nl;                      // Hadamard
+      flops += nlp * nb * (2.0 * nlp - 1.0); // Weight gradients
+      flops += nl * (nb - 1);                // Bias gradients
+      if (i > 0) {
+         flops += nlp * nb * (2.0 * nl  - 1.0); // Previous layer gradients.
+      }
+      nlp = nl;
+   }
+   return flops;
+}
 
+//______________________________________________________________________________
 template<typename Architecture_t, typename Layer_t>
    void TNet<Architecture_t, Layer_t>::Print()
 {