From 818afb9b7a87bf1a8624b1c50e51d20a73e9ee14 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 24 Jun 2016 04:36:03 +0530 Subject: [PATCH 01/42] included xxhash.h --- tmva/tmva/inc/TMVA/NeuralNet.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 3ffaca7df35bb..346a6b717dc12 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -52,6 +52,7 @@ #include // turn on or off exceptions for NaN and other numeric exceptions +#include "xxhash.h" namespace TMVA { From db6990faaa32d598592d396d31053c5a23c55448 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 24 Jun 2016 07:09:05 +0530 Subject: [PATCH 02/42] added hashing initializations --- tmva/tmva/inc/TMVA/NeuralNet.icc | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index 3ef36f1cf6eb5..a1dec10d37692 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -8,6 +8,8 @@ #include "Math/Util.h" +#include "xxhash.h" + namespace TMVA { @@ -16,8 +18,16 @@ namespace TMVA - - +/*! \brief xxHash initializations + * + * + */ + XXH32_state_t state; + int seed = 0x9747b28c; // Used to initialize the hash value, use whatever + // value you want, but always the same. + XXH32_reset(&state, seed); + + int BUCKET_SIZE = 8; @@ -1415,7 +1425,8 @@ namespace TMVA if (eInitStrategy == WeightInitializationStrategy::XAVIER) { // input and output properties - int numInput = inputSize (); + //int numInput = inputSize (); + int numInput = BUCKET_SIZE; // compute variance and mean of input and output //... From efa2efb43b29fb8d5178ee724278a23fd0a23d93 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 24 Jun 2016 13:13:49 +0530 Subject: [PATCH 03/42] added weightBucket This is just changing the 'weights' vector to a compressed 'weightBucket' vector of size 'BUCKET_SIZE' per layer. Will have restructure other parts of the code for this to make algorithmic sense. --- tmva/tmva/inc/TMVA/NeuralNet.icc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index a1dec10d37692..ba2052b32c899 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -758,7 +758,7 @@ namespace TMVA * \param settings the settings for the training (e.g. multithreading or not, regularization etc.) */ template - double Net::train (std::vector& weights, + double Net::train (std::vector& weightBucket, std::vector& trainPattern, const std::vector& testPattern, Minimizer& minimizer, Settings& settings) @@ -822,7 +822,7 @@ namespace TMVA { if (isWeightsForDrop) { - dropOutWeightFactor (weights, dropFractions); + dropOutWeightFactor (weightBucket, dropFractions); isWeightsForDrop = false; } @@ -885,10 +885,10 @@ namespace TMVA Batch batch (it, it+1); output.clear (); std::tuple passThrough (settings, batch, dropContainerTest); - double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output); + double testPatternError = (*this) (passThrough, weightBucket, ModeOutput::FETCH, output); if (output.size () == 1) { - /* std::vector out = (*this).compute (p.input (), weights); */ + /* std::vector out = (*this).compute (p.input (), weightBucket); */ /* assert (output.at (0) == out.at (0)); */ settings.testSample (testPatternError, output.at (0), p.output ().at (0), weight); } @@ -901,12 +901,12 @@ namespace TMVA settings.endTestCycle (); // testError /= weightSum; - settings.computeResult (*this, weights); + settings.computeResult (*this, weightBucket); hasConverged = settings.hasConverged (testError); if (!hasConverged && !isWeightsForDrop) { - dropOutWeightFactor (weights, dropFractions, true); // inverse + dropOutWeightFactor (weightBucket, dropFractions, true); // inverse isWeightsForDrop = true; } } @@ -963,7 +963,7 @@ namespace TMVA * \param dropContainer the data for dropping-out nodes (regularization technique) */ template - inline double Net::trainCycle (Minimizer& minimizer, std::vector& weights, + inline double Net::trainCycle (Minimizer& minimizer, std::vector& weightBucket, Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer) { double error = 0.0; @@ -1024,7 +1024,7 @@ namespace TMVA { Batch& batch = *it; std::tuple settingsAndBatch (settings, batch, dropContainer); - localError += minimizer ((*this), weights, settingsAndBatch); /// call the minimizer + localError += minimizer ((*this), weightBucket, settingsAndBatch); /// call the minimizer } return localError; }) @@ -1039,7 +1039,7 @@ namespace TMVA for (auto& batch : batches) { std::tuple settingsAndBatch (settings, batch, dropContainer); - error += minimizer ((*this), weights, settingsAndBatch); + error += minimizer ((*this), weightBucket, settingsAndBatch); } } From c821a57c5abdc26f36866a7fff748253cc53bf78 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Wed, 29 Jun 2016 01:47:17 +0530 Subject: [PATCH 04/42] edited applyWeights() without drop-out as an example --- tmva/tmva/inc/TMVA/NeuralNet.icc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index ba2052b32c899..46969736aff5f 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -115,15 +115,15 @@ namespace TMVA */ template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - ItWeight itWeight, + int weightIndex, ItTarget itTargetBegin, ItTarget itTargetEnd) { for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource) { for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget) { - (*itTarget) += (*itSource) * (*itWeight); - ++itWeight; + (*itTarget) += (*itSource) * (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); + ++weightIndex; } } } From b6328a79373375b729d768025bce9fcc443bee9a Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 1 Jul 2016 13:24:52 +0530 Subject: [PATCH 05/42] Updated all applyWeights() with HashedNets --- tmva/tmva/inc/TMVA/NeuralNet.icc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index 46969736aff5f..7c509434239fe 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -91,7 +91,7 @@ namespace TMVA */ template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - ItWeight itWeight, + int weightIndex, ItTarget itTargetBegin, ItTarget itTargetEnd, ItDrop itDrop) { @@ -100,8 +100,8 @@ namespace TMVA for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget) { if (*itDrop) - (*itTarget) += (*itSource) * (*itWeight); - ++itWeight; + (*itTarget) += (*itSource) * (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); + ++weightIndex; } ++itDrop; } @@ -137,15 +137,15 @@ namespace TMVA */ template void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - ItWeight itWeight, + int weightIndex, ItPrev itPrevBegin, ItPrev itPrevEnd) { for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) { for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr) { - (*itPrev) += (*itCurr) * (*itWeight); - ++itWeight; + (*itPrev) += (*itCurr) * (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); + ++weightIndex; } } } @@ -158,7 +158,7 @@ namespace TMVA */ template void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - ItWeight itWeight, + int weightIndex, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop) { @@ -167,8 +167,8 @@ namespace TMVA for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr) { if (*itDrop) - (*itPrev) += (*itCurr) * (*itWeight); - ++itWeight; + (*itPrev) += (*itCurr) * (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); + ++weightIndex; } ++itDrop; } From a58ac1da96af01f862bd29b6b78736a610743aaf Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 1 Jul 2016 13:45:03 +0530 Subject: [PATCH 06/42] Update NeuralNet.h --- tmva/tmva/inc/TMVA/NeuralNet.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 346a6b717dc12..5786f9137e2ff 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -240,12 +240,12 @@ namespace TMVA template - void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd); + void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int weightIndex, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd); template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd); + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int weightIndex, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd); @@ -273,7 +273,7 @@ namespace TMVA ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient, - ItWeight itWeight, double weightDecay); + int weightIndex, std::vector& weightBucket, double weightDecay); From c81f2482677b6fbe469f1a9037f5fff14e89855a Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 1 Jul 2016 14:15:27 +0530 Subject: [PATCH 07/42] updated update() and Steepest Gradient Descent --- tmva/tmva/inc/TMVA/NeuralNet.icc | 37 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index 7c509434239fe..a4bf76423dfc4 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -91,7 +91,7 @@ namespace TMVA */ template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - int weightIndex, + int weightIndex, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd, ItDrop itDrop) { @@ -115,7 +115,7 @@ namespace TMVA */ template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - int weightIndex, + int weightIndex, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd) { for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource) @@ -137,7 +137,7 @@ namespace TMVA */ template void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - int weightIndex, + int weightIndex, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd) { for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) @@ -158,7 +158,7 @@ namespace TMVA */ template void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - int weightIndex, + int weightIndex, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop) { @@ -278,7 +278,7 @@ namespace TMVA ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient, - ItWeight itWeight, double weightDecay) + int weightIndex, std::vector& weightBucket, double weightDecay) { // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights while (itSource != itSourceEnd) @@ -287,8 +287,8 @@ namespace TMVA auto itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(*itWeight,weightDecay); - ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight; + (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE],weightDecay); + ++itTargetDelta; ++itTargetGradient; ++itGradient; ++weightIndex; } ++itSource; } @@ -308,17 +308,17 @@ namespace TMVA * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle */ template - double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough) + double Steepest::operator() (Function& fitnessFunction, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, PassThrough& passThrough) { - size_t numWeights = weights.size (); - std::vector gradients (numWeights, 0.0); - std::vector localWeights (begin (weights), end (weights)); + size_t numWeights = nextLayerWeightIndex - currLayerWeightIndex + 1; + std::vector gradients (numWeights, 0.0); + std::vector localWeightBucket (begin (weightBucket), end (weightBucket)); double E = 1e10; if (m_prevGradients.size () != numWeights) { m_prevGradients.clear (); - m_prevGradients.assign (weights.size (), 0); + m_prevGradients.assign (nextLayerWeightIndex - currLayerWeightIndex + 1, 0); } bool success = true; @@ -334,11 +334,12 @@ namespace TMVA // apply momentum before computing the new gradient auto itPrevG = begin (m_prevGradients); auto itPrevGEnd = end (m_prevGradients); - auto itLocWeight = begin (localWeights); + int locWeightIndex = currLayerWeightIndex; for (; itPrevG != itPrevGEnd; ++itPrevG) { (*itPrevG) *= m_beta; - (*itLocWeight) += (*itPrevG); + (localWeightBucket[XXH32(seed, locWeightIndex) % BUCKET_SIZE]) += (*itPrevG); + ++locWeightIndex; } E = fitnessFunction (passThrough, localWeights, gradients); @@ -378,11 +379,11 @@ namespace TMVA } else { - auto itW = std::begin (weights); - std::for_each (std::begin (gradients), std::end (gradients), [&itW](double& g) + int weightIndex = currLayerWeightIndex; + std::for_each (std::begin (gradients), std::end (gradients), [&weightIndex](double& g) { - *itW += g; - ++itW; + *(weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]) += g; + ++weightIndex; }); } From 96c8f4214adf211c6b456d9320b4e0a1cab71706 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 1 Jul 2016 14:18:37 +0530 Subject: [PATCH 08/42] Updated SGD operator() with HashedNets --- tmva/tmva/inc/TMVA/NeuralNet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 5786f9137e2ff..6531c956fe0c7 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -348,7 +348,7 @@ namespace TMVA * called */ template - double operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough); + double operator() (Function& fitnessFunction, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, PassThrough& passThrough); double m_alpha; ///< internal parameter (learningRate) From 0250775185e30f697662c1f3d2362682dca62d0f Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 1 Jul 2016 14:37:44 +0530 Subject: [PATCH 09/42] updated weightDecay() --- tmva/tmva/inc/TMVA/NeuralNet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 6531c956fe0c7..7ce9fd9554b46 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -392,7 +392,7 @@ namespace TMVA template - double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization); + double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization); From 7c62f3b0d66ce1d3964632d37b8066b7975f8fda Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 1 Jul 2016 14:38:10 +0530 Subject: [PATCH 10/42] updated weightDecay() with HashedNets --- tmva/tmva/inc/TMVA/NeuralNet.icc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index a4bf76423dfc4..ee8b41c73ecf0 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -382,7 +382,7 @@ namespace TMVA int weightIndex = currLayerWeightIndex; std::for_each (std::begin (gradients), std::end (gradients), [&weightIndex](double& g) { - *(weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]) += g; + weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE] += g; ++weightIndex; }); } @@ -530,16 +530,17 @@ namespace TMVA * */ template - double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization) + double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization) { if (eRegularization == EnumRegularization::L1) { // weight decay (regularization) double w = 0; size_t n = 0; - for (; itWeight != itWeightEnd; ++itWeight, ++n) + int weightIndex; + for (weightIndex = currLayerWeightIndex; weightIndex != nextLayerWeightIndex; ++weightIndex, ++n) { - double weight = (*itWeight); + double weight = (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); w += std::fabs (weight); } return error + 0.5 * w * factorWeightDecay / n; @@ -549,9 +550,9 @@ namespace TMVA // weight decay (regularization) double w = 0; size_t n = 0; - for (; itWeight != itWeightEnd; ++itWeight, ++n) + for (weightIndex = currLayerWeightIndex; weightIndex != nextLayerWeightIndex; ++weightIndex, ++n) { - double weight = (*itWeight); + double weight = (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); w += weight*weight; } return error + 0.5 * w * factorWeightDecay / n; From fe93bb26a6e6c740d96386ae1794840617429a54 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Wed, 13 Jul 2016 18:44:28 +0530 Subject: [PATCH 11/42] Edited dropOutWeightFactor() --- tmva/tmva/inc/TMVA/NeuralNet.icc | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index ee8b41c73ecf0..6e3b889a1973d 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -700,15 +700,14 @@ namespace TMVA * this function computes the factor and applies it to the weights */ template - void Net::dropOutWeightFactor (WeightsType& weights, + void Net::dropOutWeightFactor (int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, const DropProbabilities& drops, bool inverse) { - if (drops.empty () || weights.empty ()) + if (drops.empty () || weights.empty ()) //Edit this later. ***************** return; - auto itWeight = std::begin (weights); - auto itWeightEnd = std::end (weights); + int weightIndex = currLayerWeightIndex; auto itDrop = std::begin (drops); auto itDropEnd = std::end (drops); size_t numNodesPrev = inputSize (); @@ -734,11 +733,11 @@ namespace TMVA size_t _numWeights = layer.numWeights (numNodesPrev); for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight) { - if (itWeight == itWeightEnd) + if (weightIndex == nextLayerWeightIndex) break; - *itWeight *= p; - ++itWeight; + *(weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]) *= p; + ++weightIndex; } numNodesPrev = numNodes; dropFractionPrev = dropFraction; From 13d151a95b5b9058522c9cb4abe70ba1d3658ecc Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Wed, 13 Jul 2016 18:46:31 +0530 Subject: [PATCH 12/42] Edited dropOutWeightFactor() --- tmva/tmva/inc/TMVA/NeuralNet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 7ce9fd9554b46..0660c6b7911cd 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -1094,7 +1094,7 @@ namespace TMVA * */ template - void dropOutWeightFactor (WeightsType& weights, + void dropOutWeightFactor (int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucke, const DropProbabilities& drops, bool inverse = false); From 725bba3c7c7c119be27fdde8628563edadd43d32 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Mon, 18 Jul 2016 18:35:17 +0530 Subject: [PATCH 13/42] Updated with HashedNets --- tmva/tmva/inc/TMVA/NeuralNet.icc | 103 ++++++++++++++++--------------- 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index 6e3b889a1973d..28953fc5edac6 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -89,9 +89,9 @@ namespace TMVA * * itDrop correlates with itSourceBegin */ - template + template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - int weightIndex, std::vector& weightBucket, + int itWeight, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd, ItDrop itDrop) { @@ -100,8 +100,8 @@ namespace TMVA for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget) { if (*itDrop) - (*itTarget) += (*itSource) * (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); - ++weightIndex; + (*itTarget) += (*itSource) * (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); + ++itWeight; } ++itDrop; } @@ -113,17 +113,17 @@ namespace TMVA * * */ - template + template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - int weightIndex, std::vector& weightBucket, + int itWeight, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd) { for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource) { for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget) { - (*itTarget) += (*itSource) * (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); - ++weightIndex; + (*itTarget) += (*itSource) * (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); + ++itWeight; } } } @@ -135,17 +135,17 @@ namespace TMVA * * */ - template + template void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - int weightIndex, std::vector& weightBucket, + int itWeight, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd) { for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) { for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr) { - (*itPrev) += (*itCurr) * (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); - ++weightIndex; + (*itPrev) += (*itCurr) * (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); + ++itWeight; } } } @@ -156,9 +156,9 @@ namespace TMVA * * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards) */ - template + template void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - int weightIndex, std::vector& weightBucket, + int itWeight, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop) { @@ -167,8 +167,8 @@ namespace TMVA for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr) { if (*itDrop) - (*itPrev) += (*itCurr) * (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); - ++weightIndex; + (*itPrev) += (*itCurr) * (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); + ++itWeight; } ++itDrop; } @@ -200,16 +200,16 @@ namespace TMVA * * */ - template - void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient) + template + void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, int itGradient, std::vector& gradientBucket) { while (itValue != itValueEnd) { auto& value = (*itValue); value = (*fnc.get ()) (value); - (*itGradient) = (*invFnc.get ()) (value); + (gradientBucket[XXH32(seed, itGradient) % BUCKET_SIZE]) = (*invFnc.get ()) (value); - ++itValue; ++itGradient; + ++itValue; ++gradientIndex; } } @@ -221,17 +221,18 @@ namespace TMVA */ template void update (ItSource itSource, ItSource itSourceEnd, - ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, - ItTargetGradient itTargetGradientBegin, - ItGradient itGradient) + int itTargetDeltaBegin, int itTargetDeltaEnd, + int itTargetGradientBegin, + int itGradient, + std::vector& gradientBucket) { while (itSource != itSourceEnd) { - auto itTargetDelta = itTargetDeltaBegin; - auto itTargetGradient = itTargetGradientBegin; - while (itTargetDelta != itTargetDeltaEnd) + int itTargetDelta = itTargetDeltaBegin; + int itTargetGradient = itTargetGradientBegin; + while (targetDeltaIndex != targetDeltaEnd) { - (*itGradient) += - (*itTargetDelta) * (*itSource) * (*itTargetGradient); + (gradientBucket[XXH32(seed, itGradient) % BUCKET_SIZE]) += - (gradientBucket[XXH32(seed, itTargetDelta) % BUCKET_SIZE]) * (*itSource) * (gradientBucket[XXH32(seed, itTargetGradient) % BUCKET_SIZE]); ++itTargetDelta; ++itTargetGradient; ++itGradient; } ++itSource; @@ -278,7 +279,7 @@ namespace TMVA ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient, - int weightIndex, std::vector& weightBucket, double weightDecay) + int itWeight, std::vector& weightBucket, double weightDecay) { // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights while (itSource != itSourceEnd) @@ -287,8 +288,8 @@ namespace TMVA auto itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE],weightDecay); - ++itTargetDelta; ++itTargetGradient; ++itGradient; ++weightIndex; + (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE],weightDecay); + ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight; } ++itSource; } @@ -307,7 +308,7 @@ namespace TMVA * * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle */ - template + template double Steepest::operator() (Function& fitnessFunction, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, PassThrough& passThrough) { size_t numWeights = nextLayerWeightIndex - currLayerWeightIndex + 1; @@ -318,7 +319,7 @@ namespace TMVA if (m_prevGradients.size () != numWeights) { m_prevGradients.clear (); - m_prevGradients.assign (nextLayerWeightIndex - currLayerWeightIndex + 1, 0); + m_prevGradients.assign (numWeights, 0); } bool success = true; @@ -334,15 +335,15 @@ namespace TMVA // apply momentum before computing the new gradient auto itPrevG = begin (m_prevGradients); auto itPrevGEnd = end (m_prevGradients); - int locWeightIndex = currLayerWeightIndex; + int itLocWeight = currLayerWeightIndex; for (; itPrevG != itPrevGEnd; ++itPrevG) { (*itPrevG) *= m_beta; - (localWeightBucket[XXH32(seed, locWeightIndex) % BUCKET_SIZE]) += (*itPrevG); - ++locWeightIndex; + (localWeightBucket[XXH32(seed, itLocWeight) % BUCKET_SIZE]) += (*itPrevG); + ++itLocWeight; } - E = fitnessFunction (passThrough, localWeights, gradients); + E = fitnessFunction (passThrough, localWeights, gradients); //************** Edit this later *************** // plotGradients (gradients); double alpha = gaussDouble (m_alpha, m_alpha/2.0); @@ -371,7 +372,7 @@ namespace TMVA { m_alpha /= 2; std::cout << "\nlearning rate reduced to " << m_alpha << std::endl; - std::for_each (weights.begin (), weights.end (), [maxGrad](double& w) + std::for_each (weights.begin(), weights.end(), [maxGrad](double& w) //************ Edit this later *************** { w /= maxGrad; }); @@ -379,11 +380,11 @@ namespace TMVA } else { - int weightIndex = currLayerWeightIndex; - std::for_each (std::begin (gradients), std::end (gradients), [&weightIndex](double& g) + int itWeight = currLayerWeightIndex; + std::for_each (std::begin (gradients), std::end (gradients), [&itWeight](double& g) { - weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE] += g; - ++weightIndex; + weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE] += g; + ++itWeight; }); } @@ -537,10 +538,10 @@ namespace TMVA // weight decay (regularization) double w = 0; size_t n = 0; - int weightIndex; - for (weightIndex = currLayerWeightIndex; weightIndex != nextLayerWeightIndex; ++weightIndex, ++n) + int itWeight; + for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n) { - double weight = (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); + double weight = (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); w += std::fabs (weight); } return error + 0.5 * w * factorWeightDecay / n; @@ -550,9 +551,9 @@ namespace TMVA // weight decay (regularization) double w = 0; size_t n = 0; - for (weightIndex = currLayerWeightIndex; weightIndex != nextLayerWeightIndex; ++weightIndex, ++n) + for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n) { - double weight = (weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]); + double weight = (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); w += weight*weight; } return error + 0.5 * w * factorWeightDecay / n; @@ -570,7 +571,7 @@ namespace TMVA - +// ********* Edit this later *************** @@ -707,7 +708,7 @@ namespace TMVA if (drops.empty () || weights.empty ()) //Edit this later. ***************** return; - int weightIndex = currLayerWeightIndex; + int itWeight = currLayerWeightIndex; auto itDrop = std::begin (drops); auto itDropEnd = std::end (drops); size_t numNodesPrev = inputSize (); @@ -733,11 +734,11 @@ namespace TMVA size_t _numWeights = layer.numWeights (numNodesPrev); for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight) { - if (weightIndex == nextLayerWeightIndex) + if (itWeight == nextLayerWeightIndex) break; - *(weightBucket[XXH32(seed, weightIndex) % BUCKET_SIZE]) *= p; - ++weightIndex; + *(weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]) *= p; + ++itWeight; } numNodesPrev = numNodes; dropFractionPrev = dropFraction; From 4ec89179c9db077d65f020779e244ff68e957d54 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Mon, 18 Jul 2016 19:04:22 +0530 Subject: [PATCH 14/42] Updated train() with HashedNets --- tmva/tmva/inc/TMVA/NeuralNet.icc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index 28953fc5edac6..d5fdb01dc5592 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -760,7 +760,7 @@ namespace TMVA * \param settings the settings for the training (e.g. multithreading or not, regularization etc.) */ template - double Net::train (std::vector& weightBucket, + double Net::train (int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, std::vector& trainPattern, const std::vector& testPattern, Minimizer& minimizer, Settings& settings) @@ -815,7 +815,7 @@ namespace TMVA } // execute training cycle - trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer); + trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer); // *********** Edit this after trainCycle() *********** // check if we execute a test @@ -824,7 +824,7 @@ namespace TMVA { if (isWeightsForDrop) { - dropOutWeightFactor (weightBucket, dropFractions); + dropOutWeightFactor (currLayerWeightIndex, nextLayerWeightIndex, weightBucket, dropFractions, false); isWeightsForDrop = false; } @@ -854,7 +854,7 @@ namespace TMVA { std::vector localOutput; std::tuple passThrough (settings, batch, dropContainerTest); - double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput); + double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput); // *********** Edit this after trainCycle() *********** return std::make_tuple (testBatchError, localOutput); }) ); @@ -870,7 +870,7 @@ namespace TMVA auto it = begin (testPattern); for (double out : output) { - settings.testSample (0, out, (*it).output ().at (0), (*it).weight ()); + settings.testSample (0, out, (*it).output ().at (0), (*it).weight ()); //*********** Edit this later *********** ++it; } } @@ -883,11 +883,11 @@ namespace TMVA for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it) { const Pattern& p = (*it); - double weight = p.weight (); + double weight = p.weight (); //*********** Edit this after trainCycle() *********** Batch batch (it, it+1); output.clear (); std::tuple passThrough (settings, batch, dropContainerTest); - double testPatternError = (*this) (passThrough, weightBucket, ModeOutput::FETCH, output); + double testPatternError = (*this) (passThrough, weightBucket, ModeOutput::FETCH, output); // *********** Edit this after trainCycle() *********** if (output.size () == 1) { /* std::vector out = (*this).compute (p.input (), weightBucket); */ @@ -908,7 +908,7 @@ namespace TMVA hasConverged = settings.hasConverged (testError); if (!hasConverged && !isWeightsForDrop) { - dropOutWeightFactor (weightBucket, dropFractions, true); // inverse + dropOutWeightFactor (currLayerWeightIndex, nextLayerWeightIndex, weightBucket, dropFractions, true); // inverse isWeightsForDrop = true; } } From b1caf73121aeadb2791dfbec359e853c9e4960d0 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Tue, 23 Aug 2016 19:20:20 +0530 Subject: [PATCH 15/42] Updated with complete HashedNets --- tmva/tmva/inc/TMVA/NeuralNet.icc | 1158 ++++++++++++++++-------------- 1 file changed, 613 insertions(+), 545 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index d5fdb01dc5592..c49dd1e7962df 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -8,26 +8,21 @@ #include "Math/Util.h" -#include "xxhash.h" - +#include namespace TMVA { namespace DNN { + +/*! \brief Hash initialization + * + * + */ + std::hash hasherFunction; + // --------------------------------------------------------------------------------- -/*! \brief xxHash initializations - * - * - */ - XXH32_state_t state; - int seed = 0x9747b28c; // Used to initialize the hash value, use whatever - // value you want, but always the same. - XXH32_reset(&state, seed); - - int BUCKET_SIZE = 8; @@ -85,11 +80,11 @@ namespace TMVA -/*! \brief apply weights using drop-out +/*! \brief apply weights using drop-out; for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true" * * itDrop correlates with itSourceBegin */ - template +template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int itWeight, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd, @@ -99,81 +94,43 @@ namespace TMVA { for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget) { - if (*itDrop) - (*itTarget) += (*itSource) * (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); + if (!HasDropOut || *itDrop) + (*itTarget) += (*itSource) * (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]); ++itWeight; } - ++itDrop; + if (HasDropOut) ++itDrop; } } -/*! \brief apply weights without drop-out - * - * - */ - template - void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - int itWeight, std::vector& weightBucket, - ItTarget itTargetBegin, ItTarget itTargetEnd) - { - for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource) - { - for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget) - { - (*itTarget) += (*itSource) * (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); - ++itWeight; - } - } - } - -/*! \brief apply weights backwards (for backprop) +/*! \brief apply weights backwards (for backprop); for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true" * - * + * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards) */ - template +template void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, - ItPrev itPrevBegin, ItPrev itPrevEnd) + ItPrev itPrevBegin, ItPrev itPrevEnd, + ItDrop itDrop) { for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) { for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr) { - (*itPrev) += (*itCurr) * (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); + if (!HasDropOut || *itDrop) + (*itPrev) += (*itCurr) * (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]); ++itWeight; } + if (HasDropOut) ++itDrop; } } -/*! \brief apply weights backwards (for backprop) - * - * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards) - */ - template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - int itWeight, std::vector& weightBucket, - ItPrev itPrevBegin, ItPrev itPrevEnd, - ItDrop itDrop) - { - for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) - { - for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr) - { - if (*itDrop) - (*itPrev) += (*itCurr) * (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); - ++itWeight; - } - ++itDrop; - } - } - @@ -200,16 +157,16 @@ namespace TMVA * * */ - template + template void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, int itGradient, std::vector& gradientBucket) { while (itValue != itValueEnd) { auto& value = (*itValue); value = (*fnc.get ()) (value); - (gradientBucket[XXH32(seed, itGradient) % BUCKET_SIZE]) = (*invFnc.get ()) (value); + (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) = (*invFnc.get ()) (value); - ++itValue; ++gradientIndex; + ++itValue; ++itGradient; } } @@ -221,7 +178,7 @@ namespace TMVA */ template void update (ItSource itSource, ItSource itSourceEnd, - int itTargetDeltaBegin, int itTargetDeltaEnd, + int itTargetDeltaBegin, int itTargetDeltaEnd, std::vector& deltaBucket, int itTargetGradientBegin, int itGradient, std::vector& gradientBucket) @@ -230,9 +187,9 @@ namespace TMVA { int itTargetDelta = itTargetDeltaBegin; int itTargetGradient = itTargetGradientBegin; - while (targetDeltaIndex != targetDeltaEnd) + while (itTargetDelta != itTargetDeltaEnd) { - (gradientBucket[XXH32(seed, itGradient) % BUCKET_SIZE]) += - (gradientBucket[XXH32(seed, itTargetDelta) % BUCKET_SIZE]) * (*itSource) * (gradientBucket[XXH32(seed, itTargetGradient) % BUCKET_SIZE]); + (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) += - (deltaBucket[hasherFunction(itTargetDelta) % BUCKET_SIZE]) * (*itSource) * (gradientBucket[hasherFunction(itTargetGradient) % BUCKET_SIZE]); ++itTargetDelta; ++itTargetGradient; ++itGradient; } ++itSource; @@ -276,19 +233,19 @@ namespace TMVA */ template void update (ItSource itSource, ItSource itSourceEnd, - ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, - ItTargetGradient itTargetGradientBegin, - ItGradient itGradient, + int itTargetDeltaBegin, int itTargetDeltaEnd, std::vector& deltaBucket, + int itTargetGradientBegin, + int itGradient, std::vector& gradientBucket, int itWeight, std::vector& weightBucket, double weightDecay) { // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights while (itSource != itSourceEnd) { - auto itTargetDelta = itTargetDeltaBegin; - auto itTargetGradient = itTargetGradientBegin; + int itTargetDelta = itTargetDeltaBegin; + int itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE],weightDecay); + gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE] -= + deltaBucket[hasherFunction(itTargetDelta) % BUCKET_SIZE] * (*itSource) * gradientBucket[hasherFunction(itTargetGradient) % BUCKET_SIZE] + computeRegularization(weightBucket[hasherFunction(itWeight) % BUCKET_SIZE],weightDecay); ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight; } ++itSource; @@ -308,18 +265,18 @@ namespace TMVA * * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle */ - template - double Steepest::operator() (Function& fitnessFunction, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, PassThrough& passThrough) + template + double Steepest::operator() (Function& fitnessFunction, std::vector& weightBucket, std::vector& gradientBucket, PassThrough& passThrough) { - size_t numWeights = nextLayerWeightIndex - currLayerWeightIndex + 1; - std::vector gradients (numWeights, 0.0); + size_t numWeights = weightBucket.size () * BUCKET_SIZE; + std::vector gradients (numWeights, 0.0); std::vector localWeightBucket (begin (weightBucket), end (weightBucket)); double E = 1e10; - if (m_prevGradients.size () != numWeights) + if (m_prevGradientBucket.size () != weightBucket.size ()) { - m_prevGradients.clear (); - m_prevGradients.assign (numWeights, 0); + m_prevGradientBucket.clear (); + m_prevGradientBucket.assign (weightBucket.size (), 0); } bool success = true; @@ -333,36 +290,36 @@ namespace TMVA // --- nesterov momentum --- // apply momentum before computing the new gradient - auto itPrevG = begin (m_prevGradients); - auto itPrevGEnd = end (m_prevGradients); - int itLocWeight = currLayerWeightIndex; - for (; itPrevG != itPrevGEnd; ++itPrevG) + int itPrevG = 0; + int itPrevGEnd = numWeights - 1; + int itLocWeight = 0; + for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight) { - (*itPrevG) *= m_beta; - (localWeightBucket[XXH32(seed, itLocWeight) % BUCKET_SIZE]) += (*itPrevG); - ++itLocWeight; + (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]) *= m_beta; + (localWeightBucket[hasherFunction(itLocWeight) % BUCKET_SIZE]) += (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]); } - E = fitnessFunction (passThrough, localWeights, gradients); //************** Edit this later *************** + E = fitnessFunction (passThrough, localWeights, gradients); // ************************** // plotGradients (gradients); +// plotWeights (localWeightBucket); double alpha = gaussDouble (m_alpha, m_alpha/2.0); // double alpha = m_alpha; - auto itG = begin (gradients); - auto itGEnd = end (gradients); - itPrevG = begin (m_prevGradients); + int itG = 0; + int itGEnd = numWeights - 1; + itPrevG = 0; double maxGrad = 0.0; for (; itG != itGEnd; ++itG, ++itPrevG) { - double currGrad = (*itG); - double prevGrad = (*itPrevG); + double currGrad = (gradientBucket[hasherFunction(itG) % BUCKET_SIZE]); + double prevGrad = (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]); currGrad *= alpha; //(*itPrevG) = m_beta * (prevGrad + currGrad); currGrad += prevGrad; - (*itG) = currGrad; - (*itPrevG) = currGrad; + (gradientBucket[hasherFunction(itG) % BUCKET_SIZE]) = currGrad; + (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]) = currGrad; if (std::fabs (currGrad) > maxGrad) maxGrad = currGrad; @@ -372,19 +329,19 @@ namespace TMVA { m_alpha /= 2; std::cout << "\nlearning rate reduced to " << m_alpha << std::endl; - std::for_each (weights.begin(), weights.end(), [maxGrad](double& w) //************ Edit this later *************** + std::for_each (weightBucket.begin (), weightBucket.end (), [maxGrad](double& w) { w /= maxGrad; }); - m_prevGradients.clear (); + m_prevGradientBucket.clear (); } else { - int itWeight = currLayerWeightIndex; - std::for_each (std::begin (gradients), std::end (gradients), [&itWeight](double& g) + int itW = 0; + std::for_each (gradientBucket.begin (), gradientBucket.end (), [&itW](double& g) { - weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE] += g; - ++itWeight; + weightBucket[hasherFunction(itW) % BUCKET_SIZE] += g; + ++itW; }); } @@ -410,12 +367,14 @@ namespace TMVA + + /*! \brief sum of squares error function * * */ template - double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight) + double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, InvFnc invFnc, double patternWeight) { double errorSum = 0.0; @@ -429,7 +388,7 @@ namespace TMVA double error = output - (*itTruth); if (hasDeltas) { - (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight; + (deltaBucket[hasherFunction(itTargetDelta) % BUCKET_SIZE]) = (*invFnc.get ()) (output) * error * patternWeight; ++itDelta; } errorSum += error*error * patternWeight; @@ -445,7 +404,7 @@ namespace TMVA * */ template - double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight) + double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc /*itInvActFnc*/, double patternWeight) { bool hasDeltas = (itDelta != itDeltaEnd); @@ -460,8 +419,8 @@ namespace TMVA if (hasDeltas) { double delta = probability - truth; - (*itDelta) = delta*patternWeight; -// (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight; + (deltaBucket[hasherFunction(itTargetDelta) % BUCKET_SIZE]) = delta*patternWeight; +// (deltaBucket[hasherFunction(itTargetDelta) % BUCKET_SIZE]) = (*itInvActFnc)(probability) * delta * patternWeight; ++itDelta; } double error (0); @@ -491,7 +450,7 @@ namespace TMVA * */ template - double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight) + double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc /*itInvActFnc*/, double patternWeight) { double errorSum = 0.0; @@ -505,8 +464,8 @@ namespace TMVA double truth = (*itTruth); if (hasDeltas) { - (*itDelta) = probability - truth; -// (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight; + (deltaBucket[hasherFunction(itTargetDelta) % BUCKET_SIZE]) = probability - truth; +// (deltaBucket[hasherFunction(itTargetDelta) % BUCKET_SIZE]) = (*itInvActFnc)(sm) * delta * patternWeight; ++itDelta; //++itInvActFnc; } double error (0); @@ -541,7 +500,7 @@ namespace TMVA int itWeight; for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n) { - double weight = (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); + double weight = (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]); w += std::fabs (weight); } return error + 0.5 * w * factorWeightDecay / n; @@ -551,9 +510,10 @@ namespace TMVA // weight decay (regularization) double w = 0; size_t n = 0; + int itWeight; for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n) { - double weight = (weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]); + double weight = (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]); w += weight*weight; } return error + 0.5 * w * factorWeightDecay / n; @@ -571,76 +531,61 @@ namespace TMVA -// ********* Edit this later *************** -/*! \brief apply the weights in forward direction of the DNN - * - * - */ - template - void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData) - { - if (prevLayerData.hasDropOut ()) - { - applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), - currLayerData.valuesBegin (), currLayerData.valuesEnd (), - prevLayerData.dropOut ()); - } - else - { - applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), - currLayerData.valuesBegin (), currLayerData.valuesEnd ()); - } - } -/*! \brief apply weights (and functions) in forward direction and compute the gradients +/*! \brief apply the weights (and functions) in forward direction of the DNN * * */ template - void forward_training (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData) + void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket) { if (prevLayerData.hasDropOut ()) { - applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), + applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), + currLayerData.weightsBegin (), weightBucket, currLayerData.valuesBegin (), currLayerData.valuesEnd (), prevLayerData.dropOut ()); } else { - applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), - currLayerData.valuesBegin (), currLayerData.valuesEnd ()); + bool dummy = true; + applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), + currLayerData.weightsBegin (), weightBucket, + currLayerData.valuesBegin (), currLayerData.valuesEnd (), + &dummy); // dummy to turn on all nodes (no drop out) } } + /*! \brief backward application of the weights (back-propagation of the error) * * */ - template - void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData) - { - if (prevLayerData.hasDropOut ()) - { - applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.weightsBegin (), - prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), - prevLayerData.dropOut ()); - } - else - { - applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.weightsBegin (), - prevLayerData.deltasBegin (), prevLayerData.deltasEnd ()); - } - } +template + void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket) +{ + if (prevLayerData.hasDropOut ()) + { + applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), + currLayerData.weightsBegin (), weightBucket, + prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), + prevLayerData.dropOut ()); + } + else + { + bool dummy = true; + applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), + currLayerData.weightsBegin (), weightBucket, + prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), + &dummy); // dummy to use all nodes (no drop out) + } +} + + @@ -701,14 +646,15 @@ namespace TMVA * this function computes the factor and applies it to the weights */ template - void Net::dropOutWeightFactor (int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, + void Net::dropOutWeightFactor (std::vector& weightBucket, const DropProbabilities& drops, bool inverse) { - if (drops.empty () || weights.empty ()) //Edit this later. ***************** + if (drops.empty () || weightBucket.empty ()) return; - int itWeight = currLayerWeightIndex; + int itWeight = 0; + int itWeightEnd = weightBucket.size() * BUCKET_SIZE; auto itDrop = std::begin (drops); auto itDropEnd = std::end (drops); size_t numNodesPrev = inputSize (); @@ -720,7 +666,7 @@ namespace TMVA if (itDrop == itDropEnd) break; - size_t numNodes = layer.numNodes (); + size_t _numNodes = layer.numNodes (); double dropFraction = *itDrop; double pPrev = 1.0 - dropFractionPrev; @@ -734,13 +680,13 @@ namespace TMVA size_t _numWeights = layer.numWeights (numNodesPrev); for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight) { - if (itWeight == nextLayerWeightIndex) + if (itWeight == itWeightEnd) break; - *(weightBucket[XXH32(seed, itWeight) % BUCKET_SIZE]) *= p; + weightBucket[hasherFunction(itWeight) % BUCKET_SIZE] *= p; ++itWeight; } - numNodesPrev = numNodes; + numNodesPrev = _numNodes; dropFractionPrev = dropFraction; ++itDrop; } @@ -760,10 +706,11 @@ namespace TMVA * \param settings the settings for the training (e.g. multithreading or not, regularization etc.) */ template - double Net::train (int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, + double Net::train (std::vector& weightBucket, std::vector& trainPattern, const std::vector& testPattern, - Minimizer& minimizer, Settings& settings) + Minimizer& minimizer, + Settings& settings) { // std::cout << "START TRAINING" << std::endl; settings.startTrainCycle (); @@ -783,7 +730,11 @@ namespace TMVA const std::vector& dropFractions = settings.dropFractions (); bool isWeightsForDrop = false; - + std::vector& gradientBucket; + std::vector& deltaBucket; + + initializeGradientsDeltas(std::back_inserter (gradientBucket), std::back_inserter (deltaBucket)); + // until convergence do { @@ -795,39 +746,40 @@ namespace TMVA { // fill the dropOut-container dropContainer.clear (); - size_t numNodes = inputSize (); + size_t _numNodes = inputSize (); double dropFraction = 0.0; dropFraction = dropFractions.at (dropIndex); ++dropIndex; - fillDropContainer (dropContainer, dropFraction, numNodes); + fillDropContainer (dropContainer, dropFraction, _numNodes); for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex) { auto& layer = *itLayer; - numNodes = layer.numNodes (); + _numNodes = layer.numNodes (); // how many nodes have to be dropped dropFraction = 0.0; if (dropFractions.size () > dropIndex) dropFraction = dropFractions.at (dropIndex); - fillDropContainer (dropContainer, dropFraction, numNodes); + fillDropContainer (dropContainer, dropFraction, _numNodes); } isWeightsForDrop = true; } // execute training cycle - trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer); // *********** Edit this after trainCycle() *********** + trainError = trainCycle (minimizer, weightBucket, begin (trainPattern), end (trainPattern), settings, dropContainer); - // check if we execute a test + // ------ check if we have to execute a test ------------------ bool hasConverged = false; - if (testCycleCount % settings.testRepetitions () == 0) + if (testCycleCount % settings.testRepetitions () == 0) // we test only everye "testRepetitions" repetition { if (isWeightsForDrop) { - dropOutWeightFactor (currLayerWeightIndex, nextLayerWeightIndex, weightBucket, dropFractions, false); + dropOutWeightFactor (weightBucket, dropFractions); isWeightsForDrop = false; } + testError = 0; //double weightSum = 0; settings.startTestCycle (); @@ -837,13 +789,14 @@ namespace TMVA size_t patternPerThread = testPattern.size () / numThreads; std::vector batches; auto itPat = testPattern.begin (); - auto itPatEnd = testPattern.end (); + // auto itPatEnd = testPattern.end (); for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread) { batches.push_back (Batch (itPat, itPat + patternPerThread)); itPat += patternPerThread; } - batches.insert (batches.end (), Batch (itPat, itPatEnd)); + if (itPat != testPattern.end ()) + batches.push_back (Batch (itPat, testPattern.end ())); std::vector>>> futures; for (auto& batch : batches) @@ -853,62 +806,68 @@ namespace TMVA std::async (std::launch::async, [&]() { std::vector localOutput; - std::tuple passThrough (settings, batch, dropContainerTest); - double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput); // *********** Edit this after trainCycle() *********** + pass_through_type passThrough (settings, batch, dropContainerTest); + double testBatchError = (*this) (passThrough, weightBucket, gradientBucket, ModeOutput::FETCH, localOutput); return std::make_tuple (testBatchError, localOutput); }) ); } + auto itBatch = batches.begin (); for (auto& f : futures) { std::tuple> result = f.get (); testError += std::get<0>(result) / batches.size (); std::vector output = std::get<1>(result); - if (output.size () == testPattern.size ()) + + //if (output.size () == testPattern.size ()) { - auto it = begin (testPattern); + //auto it = begin (testPattern); + auto it = (*itBatch).begin (); for (double out : output) { - settings.testSample (0, out, (*it).output ().at (0), (*it).weight ()); //*********** Edit this later *********** + settings.testSample (0, out, (*it).output ().at (0), (*it).weight ()); ++it; } } + ++itBatch; } } else { std::vector output; - for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it) + //for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it) { - const Pattern& p = (*it); - double weight = p.weight (); //*********** Edit this after trainCycle() *********** - Batch batch (it, it+1); + //const Pattern& p = (*it); + //double weight = p.weight (); + //Batch batch (it, it+1); + Batch batch (begin (testPattern), end (testPattern)); output.clear (); - std::tuple passThrough (settings, batch, dropContainerTest); - double testPatternError = (*this) (passThrough, weightBucket, ModeOutput::FETCH, output); // *********** Edit this after trainCycle() *********** - if (output.size () == 1) + pass_through_type passThrough (settings, batch, dropContainerTest); + double testPatternError = (*this) (passThrough, weightBucket, ModeOutput::FETCH, output); + + auto it = batch.begin (); + for (double out : output) { - /* std::vector out = (*this).compute (p.input (), weightBucket); */ - /* assert (output.at (0) == out.at (0)); */ - settings.testSample (testPatternError, output.at (0), p.output ().at (0), weight); + settings.testSample (0, out, (*it).output ().at (0), (*it).weight ()); + ++it; } //weightSum += fabs (weight); //testError += testPatternError*weight; - testError += testPatternError; + testError += testPatternError; /// batch.size (); } - testError /= testPattern.size (); + // testError /= testPattern.size (); } settings.endTestCycle (); // testError /= weightSum; - settings.computeResult (*this, weightBucket); + settings.computeResult (*this, weights); hasConverged = settings.hasConverged (testError); if (!hasConverged && !isWeightsForDrop) { - dropOutWeightFactor (currLayerWeightIndex, nextLayerWeightIndex, weightBucket, dropFractions, true); // inverse + dropOutWeightFactor (weightBucket, dropFractions, true); // inverse isWeightsForDrop = true; } } @@ -1025,8 +984,8 @@ namespace TMVA for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it) { Batch& batch = *it; - std::tuple settingsAndBatch (settings, batch, dropContainer); - localError += minimizer ((*this), weightBucket, settingsAndBatch); /// call the minimizer + pass_through_type settingsAndBatch (settings, batch, dropContainer); + localError += minimizer ((*this), weightBucket, gradientBucket, settingsAndBatch); /// call the minimizer } return localError; }) @@ -1041,7 +1000,7 @@ namespace TMVA for (auto& batch : batches) { std::tuple settingsAndBatch (settings, batch, dropContainer); - error += minimizer ((*this), weightBucket, settingsAndBatch); + error += minimizer ((*this), weightBucket, gradientBucket, settingsAndBatch); } } @@ -1062,15 +1021,17 @@ namespace TMVA * \param weights the weight data */ template - std::vector Net::compute (const std::vector& input, const Weights& weights) const + std::vector Net::compute (const std::vector& input, const std::vector& weightBucket) const { std::vector layerData; layerData.reserve (m_layers.size ()+1); - auto itWeight = begin (weights); + int itWeight = 0; auto itInputBegin = begin (input); auto itInputEnd = end (input); layerData.push_back (LayerData (itInputBegin, itInputEnd)); size_t numNodesPrev = input.size (); + + // -------------------- prepare layer data with one pattern ------------------------------- for (auto& layer: m_layers) { layerData.push_back (LayerData (layer.numNodes (), itWeight, @@ -1083,329 +1044,415 @@ namespace TMVA // --------- forward ------------- - size_t idxLayer = 0, idxLayerEnd = m_layers.size (); - for (; idxLayer < idxLayerEnd; ++idxLayer) - { - LayerData& prevLayerData = layerData.at (idxLayer); - LayerData& currLayerData = layerData.at (idxLayer+1); - - forward (prevLayerData, currLayerData); - applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); - } + forwardPattern (m_layers, layerData, weightBucket); // ------------- fetch output ------------------ - if (TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.back ().outputMode ())) - { std::vector output; - output.assign (layerData.back ().valuesBegin (), layerData.back ().valuesEnd ()); - return output; - } - std::vector output (layerData.back ().probabilities ()); + fetchOutput (layerData.back (), output); return output; } template - double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const + double Net::operator() (PassThrough& settingsAndBatch, const std::vector& weightBucket) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward - assert (numWeights () == weights.size ()); - double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false); + assert (numWeights () == (weightBucket.size() * BUCKET_SIZE)); + double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, nothing, false, weightBucket); return error; } template - double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput /*eFetch*/, OutContainer& outputContainer) const + double Net::operator() (PassThrough& settingsAndBatch, const std::vector& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward - assert (numWeights () == weights.size ()); - double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true); + assert (numWeights () == (weightBucket.size () * BUCKET_SIZE)); + double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, outputContainer, true, weightBucket); return error; } template - double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, Gradients& gradients) const + double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket) const { std::vector nothing; - assert (numWeights () == weights.size ()); - assert (weights.size () == gradients.size ()); - double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::begin (gradients), std::end (gradients), 0, nothing, false); + assert (numWeights () == (weightBucket.size () * BUCKET_SIZE)); + assert ((weightBucket.size () * BUCKET_SIZE) == (gradientBucket.size () * BUCKET_SIZE)); + double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, (gradientBucket.size () * BUCKET_SIZE) - 1, 0, nothing, false, weightBucket); return error; } template - double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const + double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket, ModeOutput eFetch, OutContainer& outputContainer) const { MATH_UNUSED(eFetch); - assert (numWeights () == weights.size ()); - assert (weights.size () == gradients.size ()); - double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true); + assert (numWeights () == weightBucket.size () * BUCKET_SIZE); + assert (weights.size () * BUCKET_SIZE == gradients.size () * BUCKET_SIZE); + double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, (gradientBucket.size () * BUCKET_SIZE) - 1, 0, outputContainer, true, weightBucket); return error; } + template + std::vector> Net::prepareLayerData (LayerContainer& _layers, + Batch& batch, + const DropContainer& dropContainer, + int itWeightBegin, + int /*itWeightEnd*/, + int itGradientBegin, + int itGradientEnd, + size_t& totalNumWeights) const + { + LayerData::const_dropout_iterator itDropOut; + bool usesDropOut = !dropContainer.empty (); + if (usesDropOut) + itDropOut = std::begin (dropContainer); + + if (_layers.empty ()) + throw std::string ("no layers in this net"); + + + // ----------- create layer data ------------------------------------------------------- + assert (_layers.back ().numNodes () == outputSize ()); + totalNumWeights = 0; + size_t totalNumNodes = 0; + std::vector> layerPatternData; + layerPatternData.reserve (_layers.size ()+1); + int itWeight = itWeightBegin; + int itGradient = itGradientBegin; + size_t numNodesPrev = inputSize (); + typename Pattern::const_iterator itInputBegin; + typename Pattern::const_iterator itInputEnd; + -/*! \brief forward propagation and backward propagation - * - * - */ - template - double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch, - ItWeight itWeightBegin, - ItGradient itGradientBegin, ItGradient itGradientEnd, - size_t trainFromLayer, - OutContainer& outputContainer, bool fetchOutput) const - { - Settings& settings = std::get<0>(settingsAndBatch); - Batch& batch = std::get<1>(settingsAndBatch); - DropContainer& dropContainer = std::get<2>(settingsAndBatch); - - bool doBatchNormalization = settings.doBatchNormalization (); - bool usesDropOut = !dropContainer.empty (); + // ItWeight itGammaBegin = itWeightBegin + numWeights (); + // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes (); + // ItGradient itGradGammaBegin = itGradientBegin + numWeights (); + // ItGradient itGradBetaBegin = itGradientBegin + numWeights () + numNodes (); - LayerData::const_dropout_iterator itDropOut; - if (usesDropOut) - itDropOut = std::begin (dropContainer); - if (_layers.empty ()) - { - std::cout << "no layers in this net" << std::endl; - throw std::string ("no layers in this net"); - } + // --------------------- prepare layer data for input layer ---------------------------- + layerPatternData.push_back (std::vector()); + for (const Pattern& _pattern : batch) + { + std::vector& layerData = layerPatternData.back (); + layerData.push_back (LayerData (numNodesPrev)); + itInputBegin = _pattern.beginInput (); + itInputEnd = _pattern.endInput (); + layerData.back ().setInput (itInputBegin, itInputEnd); + + if (usesDropOut) + layerData.back ().setDropOut (itDropOut); - double sumError = 0.0; - double sumWeights = 0.0; // ------------- + } - // ----------- create layer data ------------------------------------------------------- - assert (_layers.back ().numNodes () == outputSize ()); - size_t totalNumWeights = 0; - std::vector> layerPatternData; - layerPatternData.reserve (_layers.size ()+1); - ItWeight itWeight = itWeightBegin; - ItGradient itGradient = itGradientBegin; - size_t numNodesPrev = inputSize (); - typename Pattern::const_iterator itInputBegin; - typename Pattern::const_iterator itInputEnd; + + if (usesDropOut) + itDropOut += _layers.back ().numNodes (); - // --------------------- prepare layer data for input layer ---------------------------- + // ---------------- prepare subsequent layers --------------------------------------------- + // for each of the layers + for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer) + { + bool isOutputLayer = (itLayer+1 == itLayerEnd); + bool isFirstHiddenLayer = (itLayer == begin (_layers)); + + auto& layer = *itLayer; layerPatternData.push_back (std::vector()); - layerPatternData.back () . reserve(batch.size()); + // for each pattern, prepare a layerData for (const Pattern& _pattern : batch) { std::vector& layerData = layerPatternData.back (); - layerData.push_back (LayerData (numNodesPrev)); + //layerData.push_back (LayerData (numNodesPrev)); + + if (itGradientBegin == itGradientEnd) + { + layerData.push_back (LayerData (layer.numNodes (), itWeight, + layer.activationFunction (), + layer.modeOutputValues ())); + } + else + { + layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient, + layer.activationFunction (), + layer.inverseActivationFunction (), + layer.modeOutputValues ())); + } - itInputBegin = _pattern.beginInput (); - itInputEnd = _pattern.endInput (); - layerData.back ().setInput (itInputBegin, itInputEnd); - if (usesDropOut) { layerData.back ().setDropOut (itDropOut); } + } + if (usesDropOut) { - itDropOut += _layers.back ().numNodes (); + itDropOut += layer.numNodes (); } + size_t _numWeights = layer.numWeights (numNodesPrev); + totalNumWeights += _numWeights; + itWeight += _numWeights; + itGradient += _numWeights; + numNodesPrev = layer.numNodes (); + totalNumNodes += numNodesPrev; - // ---------------- prepare subsequent layers --------------------------------------------- - // for each of the layers - for (auto& layer: _layers) - { - layerPatternData.push_back (std::vector()); - layerPatternData.back () . reserve(batch.size()); - // for each pattern, prepare a layerData - for (const Pattern& _pattern : batch) - { - std::vector& layerData = layerPatternData.back (); - //layerData.push_back (LayerData (numNodesPrev)); + } + assert (totalNumWeights > 0); + return layerPatternData; +} - if (itGradientBegin == itGradientEnd) - layerData.push_back (LayerData (layer.numNodes (), itWeight, - layer.activationFunction (), - layer.modeOutputValues ())); - else - layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient, - layer.activationFunction (), - layer.inverseActivationFunction (), - layer.modeOutputValues ())); - if (usesDropOut) - { - layerData.back ().setDropOut (itDropOut); - } - } - - if (usesDropOut) - { - itDropOut += layer.numNodes (); - } - size_t _numWeights = layer.numWeights (numNodesPrev); - totalNumWeights += _numWeights; - itWeight += _numWeights; - itGradient += _numWeights; - numNodesPrev = layer.numNodes (); - } - assert (totalNumWeights > 0); + template + void Net::forwardPattern (const LayerContainer& _layers, + std::vector& layerData, std::vector& weightBucket) const + { + size_t idxLayer = 0, idxLayerEnd = _layers.size (); + size_t cumulativeNodeCount = 0; + for (; idxLayer < idxLayerEnd; ++idxLayer) + { + LayerData& prevLayerData = layerData.at (idxLayer); + LayerData& currLayerData = layerData.at (idxLayer+1); + + forward (prevLayerData, currLayerData, weightBucket); + applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); + } + } - // ---------------------------------- loop over layers and pattern ------------------------------------------------------- - for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer) //std::vector& layerPattern : layerPatternData) - { - bool doTraining = idxLayer >= trainFromLayer; - // get layer-pattern data for this and the corresponding one from the next layer - std::vector& prevLayerPatternData = layerPatternData.at (idxLayer); - std::vector& currLayerPatternData = layerPatternData.at (idxLayer+1); - size_t numPattern = prevLayerPatternData.size (); - std::vector means (_layers.at (idxLayer).numNodes ()); - // ---------------- loop over layerDatas of pattern compute forward ---------------------------- - for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) - { - const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern); - LayerData& currLayerData = currLayerPatternData.at (idxPattern); + template + void Net::forwardBatch (const LayerContainer& _layers, + LayerPatternContainer& layerPatternData, + std::vector& valuesMean, + std::vector& valuesStdDev, + size_t trainFromLayer) const + { + valuesMean.clear (); + valuesStdDev.clear (); + + // ---------------------------------- loop over layers and pattern ------------------------------------------------------- + size_t cumulativeNodeCount = 0; + for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer) + { + bool doTraining = idxLayer >= trainFromLayer; + + // get layer-pattern data for this and the corresponding one from the next layer + std::vector& prevLayerPatternData = layerPatternData.at (idxLayer); + std::vector& currLayerPatternData = layerPatternData.at (idxLayer+1); + + size_t numPattern = prevLayerPatternData.size (); + size_t numNodesLayer = _layers.at (idxLayer).numNodes (); + + std::vector means (numNodesLayer); + // ---------------- loop over layerDatas of pattern compute forward ---------------------------- + for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) + { + const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern); + LayerData& currLayerData = currLayerPatternData.at (idxPattern); - if (doTraining) - forward_training (prevLayerData, currLayerData); - else - forward (prevLayerData, currLayerData); + forward (prevLayerData, currLayerData); // feed forward + } + + // ---------------- loop over layerDatas of pattern apply non-linearities ---------------------------- + for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) + { + //const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern); + LayerData& currLayerData = currLayerPatternData.at (idxPattern); + + if (doTraining) + applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (), + currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ()); + else + applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); + } + // accumulate node count + cumulativeNodeCount += numNodesLayer; + } +} - // -------- compute batch mean and variance if batch normalization is turned on ------------------ - if (doBatchNormalization && doTraining) - { -// means.at (idxPattern).add (*(prevLayerData.valuesBegin ()+idxPattern)); - } - } - // ---------------- do batch normalization ---------------------------- - if (doBatchNormalization) - { - if (doTraining) // take means and variances from batch - { - for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) - { - } - } - else // take average mean and variance for batch normalization - { - } - } - - // ---------------- loop over layerDatas of pattern apply non-linearities ---------------------------- - for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) - { -// const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern); - LayerData& currLayerData = currLayerPatternData.at (idxPattern); - - if (doTraining) - applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (), - currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ()); - else - applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); - } - } + template + void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const + { + ModeOutputValues eModeOutput = lastLayerData.outputMode (); + if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput)) + { + outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ()); + } + else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) || + isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput)) + { + const auto& prob = lastLayerData.probabilities (); + outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ; + } + else + assert (false); + } + + + + + template + void Net::fetchOutput (const std::vector& lastLayerPatternData, OutputContainer& outputContainer) const + { + for (const LayerData& lastLayerData : lastLayerPatternData) + fetchOutput (lastLayerData, outputContainer); + } + + + + template + std::tuple Net::computeError (const Settings& settings, + std::vector& lastLayerData, + Batch& batch, + int itWeightBegin, + int itWeightEnd) const + { + typename std::vector::iterator itLayerData = lastLayerData.begin (); + typename std::vector::iterator itLayerDataNext = itLayerData; + ++itLayerDataNext; + typename std::vector::iterator itLayerDataEnd = lastLayerData.end (); + typename std::vector::const_iterator itPattern = batch.begin (); + typename std::vector::const_iterator itPatternEnd = batch.end (); + + double sumWeights (0.0); + double sumError (0.0); - // ------------- fetch output ------------------ - if (fetchOutput) + size_t idxPattern = 0; + for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData, ++itLayerDataNext) + { + ++idxPattern; + + // compute E and the deltas of the computed output and the true output + LayerData& layerData = (*itLayerData); + LayerData& nextLayerData = (*itLayerDataNext); + const Pattern& _pattern = (*itPattern); + double error = errorFunction (layerData, nextLayerData, _pattern.output (), + itWeightBegin, itWeightEnd, + _pattern.weight (), settings.factorWeightDecay (), + settings.regularization ()); + sumWeights += fabs (_pattern.weight ()); + sumError += error; + } + return std::make_tuple (sumError, sumWeights); + } + + + + template + void Net::backPropagate (std::vector>& layerPatternData, std::vector& weightBucket, + const Settings& settings, + size_t trainFromLayer, + size_t totalNumWeights) const + { + bool doTraining = layerPatternData.size () > trainFromLayer; + if (doTraining) // training + { + // ------------- backpropagation ------------- + size_t idxLayer = layerPatternData.size (); + for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend (); + itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData) { - for (LayerData& lastLayerData : layerPatternData.back ()) + --idxLayer; + if (idxLayer <= trainFromLayer) // no training + break; + + std::vector& currLayerDataColl = *(itLayerPatternData); + std::vector& prevLayerDataColl = *(itLayerPatternData+1); + + size_t idxPattern = 0; + for (typename std::vector::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl), + itPrevLayerData = begin (prevLayerDataColl), itPrevLayerDataEnd = end (prevLayerDataColl); + itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern) { - ModeOutputValues eModeOutput = lastLayerData.outputMode (); - if (TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, eModeOutput)) - { - outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ()); - } - else if (TMVA::DNN::isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) || - TMVA::DNN::isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput)) - { - const auto& probs = lastLayerData.probabilities (); - outputContainer.insert (outputContainer.end (), probs.begin (), probs.end ()); - } - else - assert (false); + LayerData& currLayerData = (*itCurrLayerData); + LayerData& prevLayerData = *(itPrevLayerData); + + backward (prevLayerData, currLayerData, weightBucket); + + // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses) + // because L1 and L2 regularization + // + // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization + // + // L1 : -factorWeightDecay*sgn(w)/numWeights + // L2 : -factorWeightDecay/numWeights + update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ()); } } + } + } - // ------------- error computation ------------- - std::vector& lastLayerData = layerPatternData.back (); - bool doTraining = layerPatternData.size () > trainFromLayer; +/*! \brief forward propagation and backward propagation + * + * + */ + template + double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch, + int itWeightBegin, int itWeightEnd, + int itGradientBegin, int itGradientEnd, + size_t trainFromLayer, + OutContainer& outputContainer, bool doFetchOutput, std::vector& weightBucket) const + { + Settings& settings = std::get<0>(settingsAndBatch); + Batch& batch = std::get<1>(settingsAndBatch); + DropContainer& dropContainer = std::get<2>(settingsAndBatch); - typename std::vector::iterator itLayerData = lastLayerData.begin (); - typename std::vector::iterator itLayerDataEnd = lastLayerData.end (); + double sumError = 0.0; + double sumWeights = 0.0; // ------------- - typename std::vector::const_iterator itPattern = batch.begin (); - typename std::vector::const_iterator itPatternEnd = batch.end (); - size_t idxPattern = 0; - for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData) - { - ++idxPattern; - - // compute E and the deltas of the computed output and the true output - LayerData& layerData = (*itLayerData); - const Pattern& _pattern = (*itPattern); - itWeight = itWeightBegin; - double error = errorFunction (layerData, _pattern.output (), - itWeight, itWeight + totalNumWeights, - _pattern.weight (), settings.factorWeightDecay (), - settings.regularization ()); - sumWeights += fabs (_pattern.weight ()); - sumError += error; - } + // ----------------------------- prepare layer data ------------------------------------- + size_t totalNumWeights (0); + std::vector> layerPatternData = prepareLayerData (_layers, + batch, + dropContainer, + itWeightBegin, + itWeightEnd, + itGradientBegin, + itGradientEnd, + totalNumWeights); + - if (doTraining) // training + + // ---------------------------------- propagate forward ------------------------------------------------------------------ + std::vector valuesMean; + std::vector valuesStdDev; + forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer); + + + // ------------- fetch output ------------------ + if (doFetchOutput) { + fetchOutput (layerPatternData.back (), outputContainer); + } + + + // ------------- error computation ------------- + std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights); + + // ------------- backpropagation ------------- - size_t idxLayer = layerPatternData.size (); - for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend (); - itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData) - { - --idxLayer; - if (idxLayer <= trainFromLayer) // no training - break; + backPropagate (layerPatternData, weightBucket, settings, trainFromLayer, totalNumWeights); - std::vector& currLayerDataColl = *(itLayerPatternData); - std::vector& prevLayerDataColl = *(itLayerPatternData+1); - idxPattern = 0; - for (typename std::vector::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl), - itPrevLayerData = begin (prevLayerDataColl), itPrevLayerDataEnd = end (prevLayerDataColl); - itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern) - { - LayerData& currLayerData = (*itCurrLayerData); - LayerData& prevLayerData = *(itPrevLayerData); - - backward (prevLayerData, currLayerData); - - // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses) - // because L1 and L2 regularization - // - // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization - // - // L1 : -factorWeightDecay*sgn(w)/numWeights - // L2 : -factorWeightDecay/numWeights - update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ()); - } - } - } - + // --- compile the measures double batchSize = std::distance (std::begin (batch), std::end (batch)); for (auto it = itGradientBegin; it != itGradientEnd; ++it) (*it) /= batchSize; @@ -1427,8 +1474,7 @@ namespace TMVA if (eInitStrategy == WeightInitializationStrategy::XAVIER) { // input and output properties - //int numInput = inputSize (); - int numInput = BUCKET_SIZE; + int numInput = inputSize (); // compute variance and mean of input and output //... @@ -1439,7 +1485,7 @@ namespace TMVA { double nIn = numInput; double stdDev = sqrt (2.0/nIn); - for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight) + for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) { (*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU ++itWeight; @@ -1464,7 +1510,7 @@ namespace TMVA double nIn = numInput; double minVal = -sqrt(2.0/nIn); double maxVal = sqrt (2.0/nIn); - for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight) + for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) { (*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU @@ -1488,7 +1534,7 @@ namespace TMVA for (auto& layer: layers ()) { // double nIn = numInput; - for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight) + for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) { (*itWeight) = DNN::gaussDouble (0.0, 0.1); ++itWeight; @@ -1511,7 +1557,7 @@ namespace TMVA for (auto& layer: layers ()) { double nIn = numInput; - for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight) + for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) { (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU ++itWeight; @@ -1533,9 +1579,10 @@ namespace TMVA */ template double Net::errorFunction (LayerData& layerData, + LayerData& nextLayerData, Container truth, - ItWeight itWeight, - ItWeight itWeightEnd, + int itWeight, + int itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const @@ -1546,7 +1593,7 @@ namespace TMVA case ModeErrorFunction::SUMOFSQUARES: { error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth), - layerData.deltasBegin (), layerData.deltasEnd (), + layerData.deltasBegin (), layerData.deltasEnd (), layerData.getDeltaBucket(), layerData.inverseActivationFunction (), patternWeight); break; @@ -1557,7 +1604,7 @@ namespace TMVA std::vector probabilities = layerData.probabilities (); error = crossEntropy (begin (probabilities), end (probabilities), begin (truth), end (truth), - layerData.deltasBegin (), layerData.deltasEnd (), + layerData.deltasBegin (), layerData.deltasEnd (), layerData.getDeltaBucket(), layerData.inverseActivationFunction (), patternWeight); break; @@ -1568,7 +1615,7 @@ namespace TMVA std::vector probabilities = layerData.probabilities (); error = softMaxCrossEntropy (begin (probabilities), end (probabilities), begin (truth), end (truth), - layerData.deltasBegin (), layerData.deltasEnd (), + layerData.deltasBegin (), layerData.deltasEnd (), layerData.getDeltaBucket(), layerData.inverseActivationFunction (), patternWeight); break; @@ -1576,7 +1623,7 @@ namespace TMVA } if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE) { - error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization); + error = weightDecay (error, layerData.weightsBegin (), nextLayerData.weightsBegin (), factorWeightDecay, eRegularization); } return error; } @@ -1587,115 +1634,141 @@ namespace TMVA +// /*! \brief pre-training +// * +// * in development +// */ +// template +// void Net::preTrain (std::vector& weights, +// std::vector& trainPattern, +// const std::vector& testPattern, +// Minimizer& minimizer, Settings& settings) +// { +// auto itWeightGeneral = std::begin (weights); +// std::vector prePatternTrain (trainPattern.size ()); +// std::vector prePatternTest (testPattern.size ()); + +// size_t _inputSize = inputSize (); + +// // transform pattern using the created preNet +// auto initializePrePattern = [&](const std::vector& pttrnInput, std::vector& pttrnOutput) +// { +// pttrnOutput.clear (); +// std::transform (std::begin (pttrnInput), std::end (pttrnInput), +// std::back_inserter (pttrnOutput), +// [](const Pattern& p) +// { +// Pattern pat (p.input (), p.input (), p.weight ()); +// return pat; +// }); +// }; + +// initializePrePattern (trainPattern, prePatternTrain); +// initializePrePattern (testPattern, prePatternTest); + +// std::vector originalDropFractions = settings.dropFractions (); + +// for (auto& _layer : layers ()) +// { +// // compute number of weights (as a function of the number of incoming nodes) +// // fetch number of nodes +// size_t numNodes = _layer.numNodes (); +// size_t _numWeights = _layer.numWeights (_inputSize); + +// // ------------------ +// DNN::Net preNet; +// if (!originalDropFractions.empty ()) +// { +// originalDropFractions.erase (originalDropFractions.begin ()); +// settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ()); +// } +// std::vector preWeights; + +// // define the preNet (pretraining-net) for this layer +// // outputSize == inputSize, because this is an autoencoder; +// preNet.setInputSize (_inputSize); +// preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ())); +// preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT)); +// preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES); +// preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder) + +// // initialize weights +// preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM, +// std::back_inserter (preWeights)); + +// // overwrite already existing weights from the "general" weights +// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()); +// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer + -/*! \brief pre-training - * - * in development - */ - template - void Net::preTrain (std::vector& weights, - std::vector& trainPattern, - const std::vector& testPattern, - Minimizer& minimizer, Settings& settings) - { - auto itWeightGeneral = std::begin (weights); - std::vector prePatternTrain (trainPattern.size ()); - std::vector prePatternTest (testPattern.size ()); +// // train the "preNet" +// preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings); - size_t _inputSize = inputSize (); +// // fetch the pre-trained weights (without the output part of the autoencoder) +// std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral); - // transform pattern using the created preNet - auto initializePrePattern = [&](const std::vector& pttrnInput, std::vector& pttrnOutput) - { - pttrnOutput.clear (); - std::transform (std::begin (pttrnInput), std::end (pttrnInput), - std::back_inserter (pttrnOutput), - [](const Pattern& p) - { - Pattern pat (p.input (), p.input (), p.weight ()); - return pat; - }); - }; +// // advance the iterator on the incoming weights +// itWeightGeneral += _numWeights; - initializePrePattern (trainPattern, prePatternTrain); - initializePrePattern (testPattern, prePatternTest); +// // remove the weights of the output layer of the preNet +// preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ()); - std::vector originalDropFractions = settings.dropFractions (); +// // remove the outputLayer of the preNet +// preNet.removeLayer (); - for (auto& _layer : layers ()) - { - // compute number of weights (as a function of the number of incoming nodes) - // fetch number of nodes - size_t numNodes = _layer.numNodes (); - size_t _numWeights = _layer.numWeights (_inputSize); - - // ------------------ - DNN::Net preNet; - if (!originalDropFractions.empty ()) - { - originalDropFractions.erase (originalDropFractions.begin ()); - settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ()); - } - std::vector preWeights; - - // define the preNet (pretraining-net) for this layer - // outputSize == inputSize, because this is an autoencoder; - preNet.setInputSize (_inputSize); - preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ())); - preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT)); - preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES); - preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder) - - // initialize weights - preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM, - std::back_inserter (preWeights)); - - // overwrite already existing weights from the "general" weights - std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()); - std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer +// // set the output size to the number of nodes in the new output layer (== last hidden layer) +// preNet.setOutputSize (numNodes); +// // transform pattern using the created preNet +// auto proceedPattern = [&](std::vector& pttrn) +// { +// std::vector newPttrn; +// std::for_each (std::begin (pttrn), std::end (pttrn), +// [&preNet,&preWeights,&newPttrn](Pattern& p) +// { +// std::vector output = preNet.compute (p.input (), preWeights); +// Pattern pat (output, output, p.weight ()); +// newPttrn.push_back (pat); +// // p = pat; +// }); +// return newPttrn; +// }; - // train the "preNet" - preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings); - // fetch the pre-trained weights (without the output part of the autoencoder) - std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral); +// prePatternTrain = proceedPattern (prePatternTrain); +// prePatternTest = proceedPattern (prePatternTest); - // advance the iterator on the incoming weights - itWeightGeneral += _numWeights; - // remove the weights of the output layer of the preNet - preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ()); +// // the new input size is the output size of the already reduced preNet +// _inputSize = preNet.layers ().back ().numNodes (); +// } +// } - // remove the outputLayer of the preNet - preNet.removeLayer (); - // set the output size to the number of nodes in the new output layer (== last hidden layer) - preNet.setOutputSize (numNodes); - - // transform pattern using the created preNet - auto proceedPattern = [&](std::vector& pttrn) - { - std::vector newPttrn; - std::for_each (std::begin (pttrn), std::end (pttrn), - [&preNet,&preWeights,&newPttrn](Pattern& p) - { - std::vector output = preNet.compute (p.input (), preWeights); - Pattern pat (output, output, p.weight ()); - newPttrn.push_back (pat); -// p = pat; - }); - return newPttrn; - }; - - prePatternTrain = proceedPattern (prePatternTrain); - prePatternTest = proceedPattern (prePatternTest); +/*! \brief initialization of the deltas and gradients + * + * + */ + template + void Net::initializeGradientsDeltas (OutIterator itGradient, OutIterator itDelta) + { + // input and output properties + int numInput = inputSize (); - // the new input size is the output size of the already reduced preNet - _inputSize = preNet.layers ().back ().numNodes (); + // initialize the deltas and gradients + for (auto& layer: layers ()) + { + for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) + { + (*itGradient) = 0.0; + (*itDelta) = 0.0; + ++itGradient; itDelta++; + } + numInput = layer.numNodes (); } + return; } @@ -1708,11 +1781,6 @@ namespace TMVA - - - - - } // namespace DNN } // namespace TMVA From c696958ff92b632c2ec5a9c14fd99b2da82c2bca Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Tue, 23 Aug 2016 19:21:21 +0530 Subject: [PATCH 16/42] Updated with complete HashedNets --- tmva/tmva/inc/TMVA/NeuralNet.h | 322 ++++++++++++++++++++------------- 1 file changed, 195 insertions(+), 127 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 0660c6b7911cd..00bef4763eb26 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -52,7 +52,6 @@ #include // turn on or off exceptions for NaN and other numeric exceptions -#include "xxhash.h" namespace TMVA { @@ -62,7 +61,7 @@ namespace TMVA // double gaussDoubl (edouble mean, double sigma); - + int BUCKET_SIZE = 8; // ------------------------------- Declare Bucket Size -------------------------------------------- double gaussDouble (double mean, double sigma); double uniformDouble (double minValue, double maxValue); @@ -75,63 +74,80 @@ namespace TMVA { public: MeanVariance() - : m_n(0) - , m_sumWeights(0) - , m_sumWeightsSquared(0) - , m_mean(0) - , m_squared(0) - {} - - inline void clear() - { - m_n = 0; - m_sumWeights = 0; - m_sumWeightsSquared = 0; - } - - template - inline void add(T value, double weight = 1.0) - { - m_n++; // a value has been added - - double dValue = (double)value; - if (m_n == 1) // initialization - { - m_mean = dValue; - m_squared = 0.0; - m_sumWeightsSquared = weight*weight; - m_sumWeights = weight; - return; - } - - double tmpWeight = m_sumWeights+weight; - double diff = dValue - m_mean; - - double tmp = diff*weight/tmpWeight; - m_mean = m_mean + tmp; - m_squared = m_squared + tmpWeight*diff*tmp; - - m_sumWeights = tmpWeight; - m_sumWeightsSquared += weight*weight; - } - - - - inline int count() const { return m_n; } - inline double weights() const { if(m_n==0) return 0; return m_sumWeights; } - inline double mean() const { if(m_n==0) return 0; return m_mean; } - inline double var_N() const { if(m_n==0) return 0; return (m_squared/m_sumWeights); } - // inline double var () const { return (Variance_N()*m_n/(m_n-1)); } // unbiased for small sample sizes - inline double var () const { if(m_n==0) return 0; if(m_squared<=0) return 0.0; return (m_squared*m_sumWeights/(m_sumWeights*m_sumWeights-m_sumWeightsSquared)); } // unbiased for small sample sizes - inline double stdDev_N () const { return sqrt( var_N() ); } - inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes + : m_n(0) + , m_sumWeights(0) + , m_mean(0) + , m_squared(0) + {} + + inline void clear() + { + m_n = 0; + m_sumWeights = 0; + m_mean = 0; + m_squared = 0; + } + + template + inline void add(T value, double weight = 1.0) + { + ++m_n; // a value has been added + + if (m_n == 1) // initialization + { + m_mean = value; + m_squared = 0.0; + m_sumWeights = weight; + return; + } + + double tmpWeight = m_sumWeights+weight; + double Q = value - m_mean; + + double R = Q*weight/tmpWeight; + m_mean += R; + m_squared += m_sumWeights*R*Q; + + m_sumWeights = tmpWeight; + } + + template + inline void add (ITERATOR itBegin, ITERATOR itEnd) + { + for (ITERATOR it = itBegin; it != itEnd; ++it) + add (*it); + } + + + + inline int count() const { return m_n; } + inline double weights() const { if(m_n==0) return 0; return m_sumWeights; } + inline double mean() const { if(m_n==0) return 0; return m_mean; } + inline double var() const + { + if(m_n==0) + return 0; + if (m_squared <= 0) + return 0; + return (m_squared/m_sumWeights); + } + + inline double var_corr () const + { + if (m_n <= 1) + return var (); + + return (var()*m_n/(m_n-1)); // unbiased for small sample sizes + } + + inline double stdDev_corr () const { return sqrt( var_corr() ); } + inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes private: - size_t m_n; - double m_sumWeights; - double m_sumWeightsSquared; - double m_mean; - double m_squared; + size_t m_n; + double m_sumWeights; + double m_mean; + double m_squared; }; @@ -240,12 +256,12 @@ namespace TMVA template - void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int weightIndex, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd); + void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int itWeight, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd); template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int weightIndex, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd); + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd); @@ -256,24 +272,24 @@ namespace TMVA template - void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, ItGradient itGradient); + void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, int itGradient, std::vector& gradientBucket); template void update (ItSource itSource, ItSource itSourceEnd, - ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, - ItTargetGradient itTargetGradientBegin, - ItGradient itGradient); + int itTargetDeltaBegin, int itTargetDeltaEnd, std::vector& deltaBucket, + int itTargetGradientBegin, + int itGradient, std::vector& gradientBucket); template void update (ItSource itSource, ItSource itSourceEnd, - ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, - ItTargetGradient itTargetGradientBegin, - ItGradient itGradient, - int weightIndex, std::vector& weightBucket, double weightDecay); + int itTargetDeltaBegin, int itTargetDeltaEnd, std::vector& deltaBucket, + int itTargetGradientBegin, + int itGradient, std::vector& gradientBucket, + int itWeight, std::vector& weightBucket, double weightDecay); @@ -348,12 +364,12 @@ namespace TMVA * called */ template - double operator() (Function& fitnessFunction, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, PassThrough& passThrough); + double operator() (Function& fitnessFunction, std::vector& weightBucket, std::vector& gradientBucket, PassThrough& passThrough); double m_alpha; ///< internal parameter (learningRate) double m_beta; ///< internal parameter (momentum) - std::vector m_prevGradients; ///< vector remembers the gradients of the previous step + std::vector m_prevGradientBucket; ///< vector remembers the gradients of the previous step }; @@ -374,18 +390,18 @@ namespace TMVA template - double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight); + double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, InvFnc invFnc, double patternWeight); template - double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight); + double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc /*itInvActFnc*/, double patternWeight); template - double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight); + double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc /*itInvActFnc*/, double patternWeight); @@ -468,8 +484,8 @@ namespace TMVA * output values (mutually exclusive probability) */ LayerData (size_t size, - const_iterator_type itWeightBegin, - iterator_type itGradientBegin, + int itWeightBegin, + int itGradientBegin, std::shared_ptr> activationFunction, std::shared_ptr> inverseActivationFunction, ModeOutputValues eModeOutput = ModeOutputValues::DIRECT); @@ -487,7 +503,7 @@ namespace TMVA * output value (to create a probability); SOFTMAX applies a softmax transformation to all * output values (mutually exclusive probability) */ - LayerData (size_t size, const_iterator_type itWeightBegin, + LayerData (size_t size, int itWeightBegin, std::shared_ptr> activationFunction, ModeOutputValues eModeOutput = ModeOutputValues::DIRECT); @@ -499,10 +515,11 @@ namespace TMVA : m_size (other.m_size) , m_itInputBegin (other.m_itInputBegin) , m_itInputEnd (other.m_itInputEnd) - , m_deltas (other.m_deltas) - , m_valueGradients (other.m_valueGradients) + , m_deltaBucket (other.m_deltaBucket) + , m_valueGradientBucket (other.m_valueGradientBucket) , m_values (other.m_values) - , m_hasDropOut (false) + , m_itDropOut (other.m_itDropOut) + , m_hasDropOut (other.m_hasDropOut) , m_itConstWeightBegin (other.m_itConstWeightBegin) , m_itGradientBegin (other.m_itGradientBegin) , m_activationFunction (other.m_activationFunction) @@ -521,14 +538,15 @@ namespace TMVA : m_size (other.m_size) , m_itInputBegin (other.m_itInputBegin) , m_itInputEnd (other.m_itInputEnd) - , m_deltas (other.m_deltas) - , m_valueGradients (other.m_valueGradients) - , m_values (other.m_values) - , m_hasDropOut (false) + , m_deltaBucket (std::move(other.m_deltaBucket)) + , m_valueGradientBucket (std::move(other.m_valueGradientBucket)) + , m_values (std::move(other.m_values)) + , m_itDropOut (other.m_itDropOut) + , m_hasDropOut (other.m_hasDropOut) , m_itConstWeightBegin (other.m_itConstWeightBegin) , m_itGradientBegin (other.m_itGradientBegin) - , m_activationFunction (other.m_activationFunction) - , m_inverseActivationFunction (other.m_inverseActivationFunction) + , m_activationFunction (std::move(other.m_activationFunction)) + , m_inverseActivationFunction (std::move(other.m_inverseActivationFunction)) , m_isInputLayer (other.m_isInputLayer) , m_hasWeights (other.m_hasWeights) , m_hasGradients (other.m_hasGradients) @@ -557,7 +575,7 @@ namespace TMVA void clear () { m_values.assign (m_values.size (), 0.0); - m_deltas.assign (m_deltas.size (), 0.0); + m_deltaBucket.assign (m_deltaBucket.size (), 0.0); } const_iterator_type valuesBegin () const { return m_isInputLayer ? m_itInputBegin : begin (m_values); } ///< returns const iterator to the begin of the (node) values @@ -567,23 +585,26 @@ namespace TMVA iterator_type valuesEnd () { assert (!m_isInputLayer); return end (m_values); } ///< returns iterator to the end of the (node) values ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode - container_type probabilities () { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them + container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them - iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation) - iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation) + int deltasBegin () { return m_itGradientBegin; } ///< returns iterator to the begin of the deltas (back-propagation) + int deltasEnd () { return (m_deltaBucket.size() * BUCKET_SIZE + m_itGradientBegin); } ///< returns iterator to the end of the deltas (back-propagation) - const_iterator_type deltasBegin () const { return begin (m_deltas); } ///< returns const iterator to the begin of the deltas (back-propagation) - const_iterator_type deltasEnd () const { return end (m_deltas); } ///< returns const iterator to the end of the deltas (back-propagation) + int deltasBegin () const { return m_itGradientBegin; } ///< returns const int to the begin of the deltas (back-propagation) + int deltasEnd () const { return (m_deltaBucket.size() * BUCKET_SIZE + m_itGradientBegin); } ///< returns const int to the end of the deltas (back-propagation) - iterator_type valueGradientsBegin () { return begin (m_valueGradients); } ///< returns iterator to the begin of the gradients of the node values - iterator_type valueGradientsEnd () { return end (m_valueGradients); } ///< returns iterator to the end of the gradients of the node values + int valueGradientsBegin () { return m_itGradientBegin; } ///< returns int to the begin of the gradients of the node values + int valueGradientsEnd () { return (m_valueGradientBucket.size() * BUCKET_SIZE + m_itGradientBegin); } ///< returns int to the end of the gradients of the node values - const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients - const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients + int valueGradientsBegin () const { return m_itGradientBegin; } ///< returns const int to the begin of the gradients + int valueGradientsEnd () const { return (m_valueGradientBucket.size() * BUCKET_SIZE + m_itGradientBegin); } ///< returns const int to the end of the gradients - iterator_type gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients - const_iterator_type gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients - const_iterator_type weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer + int gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients + int gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients + int weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer + + std::vector getGradientBucket () { return m_valueGradientBucket; } // returns gradient bucket. + std::vector getDeltaBucket () { return m_deltaBucket; } // returns gradient bucket. std::shared_ptr> activationFunction () const { return m_activationFunction; } std::shared_ptr> inverseActivationFunction () const { return m_inverseActivationFunction; } @@ -601,7 +622,7 @@ namespace TMVA void clearDropOut () { m_hasDropOut = false; } bool hasDropOut () const { return m_hasDropOut; } ///< has this layer drop-out turned on? - const_dropout_iterator dropOut () const { return m_itDropOut; } ///< return the begin of the drop-out information + const_dropout_iterator dropOut () const { assert (m_hasDropOut); return m_itDropOut; } ///< return the begin of the drop-out information size_t size () const { return m_size; } ///< return the size of the layer @@ -611,7 +632,7 @@ namespace TMVA * * */ - container_type computeProbabilities (); + container_type computeProbabilities () const; private: @@ -620,14 +641,17 @@ namespace TMVA const_iterator_type m_itInputBegin; ///< iterator to the first of the nodes in the input node vector const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector - std::vector m_deltas; ///< stores the deltas for the DNN training - std::vector m_valueGradients; ///< stores the gradients of the values (nodes) + std::vector m_deltaBucket; ///< stores the deltas for the DNN training + std::vector m_valueGradientBucket; ///< stores the gradients of the values (nodes) + + Net::initializeGradientsDeltas(std::back_inserter (m_valueGradientBucket), std::back_inserter (m_deltaBucket)); // initialize delta and gradient buckets. + std::vector m_values; ///< stores the values of the nodes in this layer const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped bool m_hasDropOut; ///< dropOut is turned on? - const_iterator_type m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector - iterator_type m_itGradientBegin; ///< const iterator to the first gradient of this layer in the gradient vector + int m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector + int m_itGradientBegin; ///< iterator to the first gradient of this layer in the gradient vector std::shared_ptr> m_activationFunction; ///< activation function for this layer std::shared_ptr> m_inverseActivationFunction; ///< inverse activation function for this layer @@ -694,14 +718,11 @@ namespace TMVA template - void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData); - - template - void forward_training (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData); + void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket); template - void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData); + void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket); template @@ -727,8 +748,7 @@ namespace TMVA MinimizerType _eMinimizerType = MinimizerType::fSteepest, double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3, - bool _multithreading = true, - bool _doBatchNormalization = true); + bool _multithreading = true); /*! \brief d'tor * @@ -801,7 +821,6 @@ namespace TMVA EnumRegularization regularization () const { return m_regularization; } ///< some regularization of the DNN is turned on? bool useMultithreading () const { return m_useMultithreading; } ///< is multithreading turned on? - bool doBatchNormalization () const { return m_doBatchNormalization; } void pads (int numPads) { if (fMonitoring) fMonitoring->pads (numPads); } ///< preparation for monitoring @@ -850,7 +869,6 @@ namespace TMVA protected: bool m_useMultithreading; - bool m_doBatchNormalization; std::shared_ptr fMonitoring; }; @@ -893,10 +911,9 @@ namespace TMVA double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE, size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest, double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3, - bool _useMultithreading = true, - bool _useBatchNormalization = true) + bool _useMultithreading = true) : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay, - _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading, _useBatchNormalization) + _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading) , m_ams () , m_sumOfSigWeights (0) , m_sumOfBkgWeights (0) @@ -1094,7 +1111,7 @@ namespace TMVA * */ template - void dropOutWeightFactor (int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucke, + void dropOutWeightFactor (std::vector& weightBucket, const DropProbabilities& drops, bool inverse = false); @@ -1107,10 +1124,11 @@ namespace TMVA * \param settings settings used for this training run */ template - double train (std::vector& weights, + double train (std::vector& weightBucket, std::vector& trainPattern, const std::vector& testPattern, - Minimizer& minimizer, Settings& settings); + Minimizer& minimizer, + Settings& settings); /*! \brief pre-training for future use * @@ -1133,13 +1151,16 @@ namespace TMVA * \param dropContainer the configuration for DNN drop-out */ template - inline double trainCycle (Minimizer& minimizer, std::vector& weights, - Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer); + inline double trainCycle (Minimizer& minimizer, std::vector& weightBucket, + Iterator itPatternBegin, Iterator itPatternEnd, + Settings& settings, + DropContainer& dropContainer); size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net + size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net template - std::vector compute (const std::vector& input, const Weights& weights) const; ///< compute the net with the given input and the given weights + std::vector compute (const std::vector& input, const std::vector& weightBucket) const; ///< compute the net with the given input and the given weights template double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients @@ -1148,24 +1169,66 @@ namespace TMVA double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function template - double operator() (PassThrough& settingsAndBatch, const Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well + double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well template - double operator() (PassThrough& settingsAndBatch, const Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const; + double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const; + + + template + std::vector> prepareLayerData (LayerContainer& layers, + Batch& batch, + const DropContainer& dropContainer, + int itWeightBegin, + int itWeightEnd, + int itGradientBegin, + int itGradientEnd, + size_t& totalNumWeights) const; + + template + void forwardPattern (const LayerContainer& _layers, + std::vector& layerData, std::vector& weightBucket) const; + + + template + void forwardBatch (const LayerContainer& _layers, + LayerPatternContainer& layerPatternData, + std::vector& valuesMean, + std::vector& valuesStdDev, + size_t trainFromLayer) const; + + template + void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const; + + template + void fetchOutput (const std::vector& layerPatternData, OutputContainer& outputContainer) const; + + template + std::tuple computeError (const Settings& settings, + std::vector& lastLayerData, + Batch& batch, + int itWeightBegin, + int itWeightEnd) const; + template + void backPropagate (std::vector>& layerPatternData, std::vector& weightBucket, + const Settings& settings, + size_t trainFromLayer, + size_t totalNumWeights) const; + - /*! \brief main DNN computation function + /*! \brief main NN computation function * * */ template double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch, - ItWeight itWeightBegin, - ItGradient itGradientBegin, ItGradient itGradientEnd, + int itWeightBegin, int itWeightEnd, + int itGradientBegin, int itGradientEnd, size_t trainFromLayer, - OutContainer& outputContainer, bool fetchOutput) const; + OutContainer& outputContainer, bool fetchOutput, std::vector& weightBucket) const; @@ -1179,6 +1242,7 @@ namespace TMVA */ template double errorFunction (LayerData& layerData, + LayerData& nextLayerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, @@ -1204,6 +1268,9 @@ namespace TMVA void initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight); ///< initialize the weights with the given strategy + template + void initializeGradientsDeltas (OutIterator itGradient, OutIterator itDelta); ///< initialize the weights with the given strategy + protected: void fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const; ///< prepare the drop-out-container (select the nodes which are to be dropped out) @@ -1220,6 +1287,7 @@ namespace TMVA +typedef std::tuple pass_through_type; From 8d6b2d46bfc17b03df65890138bfce452282823e Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Tue, 23 Aug 2016 19:22:29 +0530 Subject: [PATCH 17/42] Updated with Complete HashedNets --- tmva/tmva/inc/TMVA/MethodDNN.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tmva/tmva/inc/TMVA/MethodDNN.h b/tmva/tmva/inc/TMVA/MethodDNN.h index 31a3f11d27775..d97ec6c60e42e 100644 --- a/tmva/tmva/inc/TMVA/MethodDNN.h +++ b/tmva/tmva/inc/TMVA/MethodDNN.h @@ -70,12 +70,10 @@ namespace TMVA { MethodDNN ( const TString& jobName, const TString& methodTitle, DataSetInfo& theData, - const TString& theOption, - TDirectory* theTargetDir = 0 ); + const TString& theOption); MethodDNN ( DataSetInfo& theData, - const TString& theWeightFile, - TDirectory* theTargetDir = 0 ); + const TString& theWeightFile ); virtual ~MethodDNN(); @@ -131,7 +129,7 @@ namespace TMVA { private: TMVA::DNN::Net fNet; - std::vector fWeights; + std::vector fWeightBucket; TString fLayoutString; std::vector> fLayout; From e277c733d65d5d8cf57316f3e09ef9a8abb39c37 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Tue, 23 Aug 2016 19:24:46 +0530 Subject: [PATCH 18/42] Update MethodDNN.h --- tmva/tmva/inc/TMVA/MethodDNN.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tmva/tmva/inc/TMVA/MethodDNN.h b/tmva/tmva/inc/TMVA/MethodDNN.h index d97ec6c60e42e..1655e713bb21f 100644 --- a/tmva/tmva/inc/TMVA/MethodDNN.h +++ b/tmva/tmva/inc/TMVA/MethodDNN.h @@ -1,5 +1,5 @@ // @(#)root/tmva $Id$ -// Author: Peter Speckmayer +// Authors: Peter Speckmayer, Aditya Sharma /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -10,7 +10,8 @@ * Description: * * NeuralNetwork * * * - * Authors (alphabetical): * + * Authors (alphabetical): + * Aditya Sharma - CERN, Switzerland * Peter Speckmayer - CERN, Switzerland * * * * Copyright (c) 2005-2015: * From a820db640d956ca5265ff9ea635c08b736f25df2 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Tue, 23 Aug 2016 19:25:42 +0530 Subject: [PATCH 19/42] Update NeuralNet.h --- tmva/tmva/inc/TMVA/NeuralNet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 00bef4763eb26..82b5cfb23fe72 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -1,6 +1,6 @@ /** * @file NeuralNet - * @author Peter Speckmayer + * @author Peter Speckmayer, Aditya Sharma * @version 1.0 * * @section LICENSE From 919a319ff794101b717e046e5207aefc790c1131 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Tue, 23 Aug 2016 19:28:00 +0530 Subject: [PATCH 20/42] Updated with HashedNets --- tmva/tmva/src/MethodDNN.cxx | 47 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/tmva/tmva/src/MethodDNN.cxx b/tmva/tmva/src/MethodDNN.cxx index c16044730c6cc..b24a482c623bf 100644 --- a/tmva/tmva/src/MethodDNN.cxx +++ b/tmva/tmva/src/MethodDNN.cxx @@ -1,5 +1,5 @@ // @(#)root/tmva $Id$ -// Author: Peter Speckmayer +// Authors: Peter Speckmayer, Aditya Sharma /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -10,7 +10,8 @@ * Description: * * A neural network implementation * * * - * Authors (alphabetical): * + * Authors (alphabetical): + * Aditya Sharma - CERN, Switzerland * Peter Speckmayer - CERN, Switzerland * * * * Copyright (c) 2005-2015: * @@ -79,9 +80,8 @@ ClassImp(TMVA::MethodDNN) TMVA::MethodDNN::MethodDNN( const TString& jobName, const TString& methodTitle, DataSetInfo& theData, - const TString& theOption, - TDirectory* theTargetDir ) - : MethodBase( jobName, Types::kDNN, methodTitle, theData, theOption, theTargetDir ) + const TString& theOption ) + : MethodBase( jobName, Types::kDNN, methodTitle, theData, theOption) , fResume (false) { // standard constructor @@ -89,9 +89,8 @@ TMVA::MethodDNN::MethodDNN( const TString& jobName, //______________________________________________________________________________ TMVA::MethodDNN::MethodDNN( DataSetInfo& theData, - const TString& theWeightFile, - TDirectory* theTargetDir ) - : MethodBase( Types::kDNN, theData, theWeightFile, theTargetDir ) + const TString& theWeightFile) + : MethodBase( Types::kDNN, theData, theWeightFile) , fResume (false) { // constructor from a weight file @@ -549,15 +548,15 @@ void TMVA::MethodDNN::Train() if (trainPattern.empty () || testPattern.empty ()) return; - // create net and weights + // create net and weight bucket fNet.clear (); - fWeights.clear (); + fWeightBucket.clear (); // if "resume" from saved weights if (fResume) { std::cout << ".. resume" << std::endl; - // std::tie (fNet, fWeights) = ReadWeights (fFileName); + // std::tie (fNet, fWeightBucket) = ReadWeights (fFileName); } else // initialize weights and net { @@ -601,7 +600,7 @@ void TMVA::MethodDNN::Train() // initialize weights fNet.initializeWeights (fWeightInitializationStrategy, - std::back_inserter (fWeights)); + std::back_inserter (fWeightBucket)); } @@ -637,7 +636,7 @@ void TMVA::MethodDNN::Train() if (ptrSettings->minimizerType () == TMVA::DNN::MinimizerType::fSteepest) { DNN::Steepest minimizer (ptrSettings->learningRate (), ptrSettings->momentum (), ptrSettings->repetitions ()); - /*E =*/fNet.train (fWeights, trainPattern, testPattern, minimizer, *ptrSettings.get ()); + /*E =*/fNet.train (fWeightBucket, trainPattern, testPattern, minimizer, *ptrSettings.get ()); } ptrSettings.reset (); Log () << kINFO << Endl; @@ -652,13 +651,13 @@ void TMVA::MethodDNN::Train() //_______________________________________________________________________ Double_t TMVA::MethodDNN::GetMvaValue( Double_t* /*errLower*/, Double_t* /*errUpper*/ ) { - if (fWeights.empty ()) + if (fWeightBucket.empty ()) return 0.0; const std::vector& inputValues = GetEvent ()->GetValues (); std::vector input (inputValues.begin (), inputValues.end ()); input.push_back (1.0); // bias node - std::vector output = fNet.compute (input, fWeights); + std::vector output = fNet.compute (input, fWeightBucket); if (output.empty ()) return 0.0; @@ -670,8 +669,8 @@ Double_t TMVA::MethodDNN::GetMvaValue( Double_t* /*errLower*/, Double_t* /*errUp const std::vector &TMVA::MethodDNN::GetRegressionValues() { - assert (!fWeights.empty ()); - if (fWeights.empty ()) + assert (!fWeightBucket.empty ()); + if (fWeightBucket.empty ()) return *fRegressionReturnVal; const Event * ev = GetEvent(); @@ -679,7 +678,7 @@ const std::vector &TMVA::MethodDNN::GetRegressionValues() const std::vector& inputValues = ev->GetValues (); std::vector input (inputValues.begin (), inputValues.end ()); input.push_back (1.0); // bias node - std::vector output = fNet.compute (input, fWeights); + std::vector output = fNet.compute (input, fWeightBucket); if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector(); fRegressionReturnVal->clear(); @@ -717,13 +716,13 @@ const std::vector &TMVA::MethodDNN::GetRegressionValues() const std::vector &TMVA::MethodDNN::GetMulticlassValues() { - if (fWeights.empty ()) + if (fWeightBucket.empty ()) return *fRegressionReturnVal; const std::vector& inputValues = GetEvent ()->GetValues (); std::vector input (inputValues.begin (), inputValues.end ()); input.push_back (1.0); // bias node - std::vector output = fNet.compute (input, fWeights); + std::vector output = fNet.compute (input, fWeightBucket); // check the output of the network @@ -787,10 +786,10 @@ void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const void* weightsxml = gTools().xmlengine().NewChild(nn, 0, "Synapses"); gTools().xmlengine().NewAttr (weightsxml, 0, "InputSize", gTools().StringFromInt((int)fNet.inputSize ())); gTools().xmlengine().NewAttr (weightsxml, 0, "OutputSize", gTools().StringFromInt((int)fNet.outputSize ())); - gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt((int)fWeights.size ())); + gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt(((int)fWeightBucket.size ()) * BUCKET_SIZE); std::stringstream s(""); s.precision( 16 ); - for (std::vector::const_iterator it = fWeights.begin (), itEnd = fWeights.end (); it != itEnd; ++it) + for (std::vector::const_iterator it = fWeightBucket.begin (), itEnd = fWeightBucket.end (); it != itEnd; ++it) { s << std::scientific << (*it) << " "; } @@ -857,11 +856,11 @@ void TMVA::MethodDNN::ReadWeightsFromXML( void* wghtnode ) const char* content = gTools().GetContent (xmlWeights); std::stringstream sstr (content); - for (Int_t iWeight = 0; iWeight> weight; - fWeights.push_back (weight); + fWeightBucket.push_back (weight); } } From 4d9e46cead78e85686fcedcc481c3bda94011e12 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Wed, 24 Aug 2016 17:43:28 +0530 Subject: [PATCH 21/42] Error corrections --- tmva/tmva/inc/TMVA/NeuralNet.h | 54 +++++++++++++++++----------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 82b5cfb23fe72..0b244b9f69452 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -61,7 +61,7 @@ namespace TMVA // double gaussDoubl (edouble mean, double sigma); - int BUCKET_SIZE = 8; // ------------------------------- Declare Bucket Size -------------------------------------------- + const int BUCKET_SIZE = 8; // ------------------------------- Declare Bucket Size -------------------------------------------- double gaussDouble (double mean, double sigma); double uniformDouble (double minValue, double maxValue); @@ -267,12 +267,12 @@ namespace TMVA - template - void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction); + template + void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc); - template - void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, int itGradient, std::vector& gradientBucket); + template + void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, ItInverseFunction itInverseFunction, int itGradient, std::vector& gradientBucket); @@ -390,7 +390,7 @@ namespace TMVA template - double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, InvFnc invFnc, double patternWeight); + double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc itInvActFnc, double patternWeight); @@ -644,8 +644,6 @@ namespace TMVA std::vector m_deltaBucket; ///< stores the deltas for the DNN training std::vector m_valueGradientBucket; ///< stores the gradients of the values (nodes) - Net::initializeGradientsDeltas(std::back_inserter (m_valueGradientBucket), std::back_inserter (m_deltaBucket)); // initialize delta and gradient buckets. - std::vector m_values; ///< stores the values of the nodes in this layer const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped bool m_hasDropOut; ///< dropOut is turned on? @@ -1110,7 +1108,7 @@ namespace TMVA * * */ - template + template void dropOutWeightFactor (std::vector& weightBucket, const DropProbabilities& drops, bool inverse = false); @@ -1151,28 +1149,32 @@ namespace TMVA * \param dropContainer the configuration for DNN drop-out */ template - inline double trainCycle (Minimizer& minimizer, std::vector& weightBucket, + inline double trainCycle (Minimizer& minimizer, std::vector& weightBucket, std::vector& gradientBucket, Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer); + template + void forwardPattern (const LayerContainer& _layers, + std::vector& layerData, std::vector& weightBucket) const; + size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net template - std::vector compute (const std::vector& input, const std::vector& weightBucket) const; ///< compute the net with the given input and the given weights + std::vector compute (const std::vector& input, std::vector& weightBucket) const; ///< compute the net with the given input and the given weights - template - double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients + template + double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients - template - double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function + template + double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function - template - double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well + template + double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well - template - double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const; + template + double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket, ModeOutput eFetch, OutContainer& outputContainer) const; template @@ -1185,9 +1187,7 @@ namespace TMVA int itGradientEnd, size_t& totalNumWeights) const; - template - void forwardPattern (const LayerContainer& _layers, - std::vector& layerData, std::vector& weightBucket) const; + template @@ -1195,7 +1195,7 @@ namespace TMVA LayerPatternContainer& layerPatternData, std::vector& valuesMean, std::vector& valuesStdDev, - size_t trainFromLayer) const; + size_t trainFromLayer, std::vector& weightBucket, std::vector& gradientBucket) const; template void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const; @@ -1228,7 +1228,7 @@ namespace TMVA int itWeightBegin, int itWeightEnd, int itGradientBegin, int itGradientEnd, size_t trainFromLayer, - OutContainer& outputContainer, bool fetchOutput, std::vector& weightBucket) const; + OutContainer& outputContainer, bool fetchOutput, std::vector& weightBucket, std::vector& gradientBucket) const; @@ -1240,12 +1240,12 @@ namespace TMVA * * */ - template + template double errorFunction (LayerData& layerData, LayerData& nextLayerData, Container truth, - ItWeight itWeight, - ItWeight itWeightEnd, + int itWeight, + int itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const; From 3d7eec35dab02c15858de51593dcbc62a9492b3a Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Wed, 24 Aug 2016 17:55:56 +0530 Subject: [PATCH 22/42] Error corrections NeuralNet.icc --- tmva/tmva/inc/TMVA/NeuralNet.icc | 76 ++++++++++++++++---------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index c49dd1e7962df..cdc29f842b22b 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -15,7 +15,7 @@ namespace TMVA namespace DNN { - +/*! \brief Hash initialization + /*! \brief Hash initialization * * */ @@ -157,14 +157,14 @@ template - void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, int itGradient, std::vector& gradientBucket) + template + void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, ItInverseFunction itInverseFunction, int itGradient, std::vector& gradientBucket) { while (itValue != itValueEnd) { auto& value = (*itValue); value = (*fnc.get ()) (value); - (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) = (*invFnc.get ()) (value); + (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) = (*itInverseFunction.get ()) (value); ++itValue; ++itGradient; } @@ -269,7 +269,7 @@ template & weightBucket, std::vector& gradientBucket, PassThrough& passThrough) { size_t numWeights = weightBucket.size () * BUCKET_SIZE; - std::vector gradients (numWeights, 0.0); + // std::vector gradients (numWeights, 0.0); std::vector localWeightBucket (begin (weightBucket), end (weightBucket)); double E = 1e10; @@ -286,7 +286,7 @@ template = m_repetitions) break; - gradients.assign (numWeights, 0.0); + gradientBucket.assign ((numWeights / BUCKET_SIZE), 0.0); // --- nesterov momentum --- // apply momentum before computing the new gradient @@ -299,7 +299,7 @@ template - double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, InvFnc invFnc, double patternWeight) + template + double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc itInvActFnc, double patternWeight) { double errorSum = 0.0; @@ -388,7 +388,7 @@ template * the weights have to be adjusted to account for the different number of active nodes * this function computes the factor and applies it to the weights */ - template + template void Net::dropOutWeightFactor (std::vector& weightBucket, const DropProbabilities& drops, bool inverse) @@ -730,8 +730,8 @@ template const std::vector& dropFractions = settings.dropFractions (); bool isWeightsForDrop = false; - std::vector& gradientBucket; - std::vector& deltaBucket; + std::vector gradientBucket; + std::vector deltaBucket; initializeGradientsDeltas(std::back_inserter (gradientBucket), std::back_inserter (deltaBucket)); @@ -766,7 +766,7 @@ template } // execute training cycle - trainError = trainCycle (minimizer, weightBucket, begin (trainPattern), end (trainPattern), settings, dropContainer); + trainError = trainCycle (minimizer, weightBucket, gradientBucket, begin (trainPattern), end (trainPattern), settings, dropContainer); // ------ check if we have to execute a test ------------------ @@ -862,7 +862,7 @@ template settings.endTestCycle (); // testError /= weightSum; - settings.computeResult (*this, weights); + settings.computeResult (*this, weightBucket); hasConverged = settings.hasConverged (testError); if (!hasConverged && !isWeightsForDrop) @@ -924,7 +924,7 @@ template * \param dropContainer the data for dropping-out nodes (regularization technique) */ template - inline double Net::trainCycle (Minimizer& minimizer, std::vector& weightBucket, + inline double Net::trainCycle (Minimizer& minimizer, std::vector& weightBucket, std::vector& gradientBucket, Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer) { double error = 0.0; @@ -1021,7 +1021,7 @@ template * \param weights the weight data */ template - std::vector Net::compute (const std::vector& input, const std::vector& weightBucket) const + std::vector Net::compute (const std::vector& input, std::vector& weightBucket) const { std::vector layerData; layerData.reserve (m_layers.size ()+1); @@ -1053,42 +1053,42 @@ template } - template - double Net::operator() (PassThrough& settingsAndBatch, const std::vector& weightBucket) const + template + double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward assert (numWeights () == (weightBucket.size() * BUCKET_SIZE)); - double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, nothing, false, weightBucket); + double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, nothing, false, weightBucket, nothing); return error; } - template - double Net::operator() (PassThrough& settingsAndBatch, const std::vector& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const + template + double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward assert (numWeights () == (weightBucket.size () * BUCKET_SIZE)); - double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, outputContainer, true, weightBucket); + double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, outputContainer, true, weightBucket, nothing); return error; } - template + template double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket) const { std::vector nothing; assert (numWeights () == (weightBucket.size () * BUCKET_SIZE)); assert ((weightBucket.size () * BUCKET_SIZE) == (gradientBucket.size () * BUCKET_SIZE)); - double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, (gradientBucket.size () * BUCKET_SIZE) - 1, 0, nothing, false, weightBucket); + double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, (gradientBucket.size () * BUCKET_SIZE) - 1, 0, nothing, false, weightBucket, gradientBucket); return error; } - template + template double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket, ModeOutput eFetch, OutContainer& outputContainer) const { MATH_UNUSED(eFetch); assert (numWeights () == weightBucket.size () * BUCKET_SIZE); - assert (weights.size () * BUCKET_SIZE == gradients.size () * BUCKET_SIZE); - double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, (gradientBucket.size () * BUCKET_SIZE) - 1, 0, outputContainer, true, weightBucket); + assert (weightBucket.size () * BUCKET_SIZE == gradientBucket.size () * BUCKET_SIZE); + double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, (gradientBucket.size () * BUCKET_SIZE) - 1, 0, outputContainer, true, weightBucket, gradientBucket); return error; } @@ -1125,7 +1125,7 @@ template typename Pattern::const_iterator itInputBegin; typename Pattern::const_iterator itInputEnd; - + // ItWeight itGammaBegin = itWeightBegin + numWeights (); // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes (); @@ -1232,7 +1232,7 @@ template LayerPatternContainer& layerPatternData, std::vector& valuesMean, std::vector& valuesStdDev, - size_t trainFromLayer) const + size_t trainFromLayer, std::vector& weightBucket, std::vector& gradientBucket) const { valuesMean.clear (); valuesStdDev.clear (); @@ -1258,7 +1258,7 @@ template LayerData& currLayerData = currLayerPatternData.at (idxPattern); - forward (prevLayerData, currLayerData); // feed forward + forward (prevLayerData, currLayerData, weightBucket); // feed forward } // ---------------- loop over layerDatas of pattern apply non-linearities ---------------------------- @@ -1269,7 +1269,7 @@ template if (doTraining) applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (), - currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ()); + currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin (), gradientBucket); else applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); } @@ -1408,7 +1408,7 @@ template int itWeightBegin, int itWeightEnd, int itGradientBegin, int itGradientEnd, size_t trainFromLayer, - OutContainer& outputContainer, bool doFetchOutput, std::vector& weightBucket) const + OutContainer& outputContainer, bool doFetchOutput, std::vector& weightBucket, std::vector& gradientBucket) const { Settings& settings = std::get<0>(settingsAndBatch); Batch& batch = std::get<1>(settingsAndBatch); @@ -1434,7 +1434,7 @@ template // ---------------------------------- propagate forward ------------------------------------------------------------------ std::vector valuesMean; std::vector valuesStdDev; - forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer); + forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer, weightBucket, gradientBucket); // ------------- fetch output ------------------ @@ -1577,7 +1577,7 @@ template * * */ - template + template double Net::errorFunction (LayerData& layerData, LayerData& nextLayerData, Container truth, From d202ae10fa2a4eb24b70ea26555b66b0b135c8b6 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Wed, 24 Aug 2016 17:58:53 +0530 Subject: [PATCH 23/42] Error corrections MethodDNN.cxx --- tmva/tmva/src/MethodDNN.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tmva/tmva/src/MethodDNN.cxx b/tmva/tmva/src/MethodDNN.cxx index b24a482c623bf..67525af96fd50 100644 --- a/tmva/tmva/src/MethodDNN.cxx +++ b/tmva/tmva/src/MethodDNN.cxx @@ -786,7 +786,7 @@ void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const void* weightsxml = gTools().xmlengine().NewChild(nn, 0, "Synapses"); gTools().xmlengine().NewAttr (weightsxml, 0, "InputSize", gTools().StringFromInt((int)fNet.inputSize ())); gTools().xmlengine().NewAttr (weightsxml, 0, "OutputSize", gTools().StringFromInt((int)fNet.outputSize ())); - gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt(((int)fWeightBucket.size ()) * BUCKET_SIZE); + gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt(((int)fWeightBucket.size ()) * TMVA::DNN::BUCKET_SIZE); std::stringstream s(""); s.precision( 16 ); for (std::vector::const_iterator it = fWeightBucket.begin (), itEnd = fWeightBucket.end (); it != itEnd; ++it) @@ -856,7 +856,7 @@ void TMVA::MethodDNN::ReadWeightsFromXML( void* wghtnode ) const char* content = gTools().GetContent (xmlWeights); std::stringstream sstr (content); - for (Int_t iWeight = 0; iWeight< (numWeights / BUCKET_SIZE; ++iWeight) + for (Int_t iWeight = 0; iWeight< (numWeights / TMVA::DNN::BUCKET_SIZE; ++iWeight) { // synapses Double_t weight; sstr >> weight; From 52dc851d34eb022fb9b1530783b5d9ad55aee5c2 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Thu, 25 Aug 2016 15:41:26 +0530 Subject: [PATCH 24/42] Successful compile --- tmva/tmva/inc/TMVA/NeuralNet.icc | 203 +++++++++++++------------------ 1 file changed, 86 insertions(+), 117 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index cdc29f842b22b..265b923339527 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -84,7 +84,7 @@ namespace TMVA * * itDrop correlates with itSourceBegin */ -template +template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int itWeight, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd, @@ -111,11 +111,8 @@ template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - int itWeight, std::vector& weightBucket, - ItPrev itPrevBegin, ItPrev itPrevEnd, - ItDrop itDrop) +template + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop) { for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) { @@ -157,14 +154,14 @@ template - void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, ItInverseFunction itInverseFunction, int itGradient, std::vector& gradientBucket) + template + void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, ItInverseFunction invFnc, ItGradient itGradient) { while (itValue != itValueEnd) { auto& value = (*itValue); value = (*fnc.get ()) (value); - (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) = (*itInverseFunction.get ()) (value); + (*itGradient) = (*invFnc.get ()) (value); ++itValue; ++itGradient; } @@ -176,20 +173,19 @@ template + template void update (ItSource itSource, ItSource itSourceEnd, - int itTargetDeltaBegin, int itTargetDeltaEnd, std::vector& deltaBucket, - int itTargetGradientBegin, - int itGradient, - std::vector& gradientBucket) + ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, + ItTargetGradient itTargetGradientBegin, + int itGradient, std::vector& gradientBucket) { while (itSource != itSourceEnd) { - int itTargetDelta = itTargetDeltaBegin; - int itTargetGradient = itTargetGradientBegin; + auto itTargetDelta = itTargetDeltaBegin; + auto itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) += - (deltaBucket[hasherFunction(itTargetDelta) % BUCKET_SIZE]) * (*itSource) * (gradientBucket[hasherFunction(itTargetGradient) % BUCKET_SIZE]); + (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient); ++itTargetDelta; ++itTargetGradient; ++itGradient; } ++itSource; @@ -231,21 +227,21 @@ template - void update (ItSource itSource, ItSource itSourceEnd, - int itTargetDeltaBegin, int itTargetDeltaEnd, std::vector& deltaBucket, - int itTargetGradientBegin, - int itGradient, std::vector& gradientBucket, - int itWeight, std::vector& weightBucket, double weightDecay) + template + void update (ItSource itSource, ItSource itSourceEnd, + ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, + ItTargetGradient itTargetGradientBegin, + int itGradient, std::vector& gradientBucket, + int itWeight, std::vector& weightBucket, double& weightDecay) { // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights while (itSource != itSourceEnd) { - int itTargetDelta = itTargetDeltaBegin; - int itTargetGradient = itTargetGradientBegin; + auto itTargetDelta = itTargetDeltaBegin; + auto itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE] -= + deltaBucket[hasherFunction(itTargetDelta) % BUCKET_SIZE] * (*itSource) * gradientBucket[hasherFunction(itTargetGradient) % BUCKET_SIZE] + computeRegularization(weightBucket[hasherFunction(itWeight) % BUCKET_SIZE],weightDecay); + (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[hasherFunction(itWeight) % BUCKET_SIZE],weightDecay); ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight; } ++itSource; @@ -256,7 +252,6 @@ template - double Steepest::operator() (Function& fitnessFunction, std::vector& weightBucket, std::vector& gradientBucket, PassThrough& passThrough) + template + double Steepest::operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough) { size_t numWeights = weightBucket.size () * BUCKET_SIZE; // std::vector gradients (numWeights, 0.0); + std::vector gradientBucket (weightBucket.size (), 0.0); std::vector localWeightBucket (begin (weightBucket), end (weightBucket)); double E = 1e10; @@ -286,7 +282,7 @@ template = m_repetitions) break; - gradientBucket.assign ((numWeights / BUCKET_SIZE), 0.0); + gradientBucket.assign (weightBucket.size (), 0.0); // --- nesterov momentum --- // apply momentum before computing the new gradient @@ -337,10 +333,10 @@ template - double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc itInvActFnc, double patternWeight) + template + double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight) { double errorSum = 0.0; @@ -383,12 +379,12 @@ template - double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc /*itInvActFnc*/, double patternWeight) + double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight) { bool hasDeltas = (itDelta != itDeltaEnd); @@ -419,8 +415,8 @@ template - double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc /*itInvActFnc*/, double patternWeight) + double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight) { double errorSum = 0.0; @@ -459,13 +455,13 @@ template + double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization) { if (eRegularization == EnumRegularization::L1) @@ -594,36 +590,42 @@ template * */ template - void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization) + void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket) { // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights if (factorWeightDecay != 0.0) // has weight regularization if (regularization == EnumRegularization::L1) // L1 regularization ( sum(|w|) ) { - update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (), - currLayerData.weightsBegin (), factorWeightDecay); + update (prevLayerData.valuesBegin (),prevLayerData.valuesEnd (), + currLayerData.deltasBegin (), + currLayerData.deltasEnd (), + currLayerData.valueGradientsBegin (), + currLayerData.gradientsBegin (), gradientBucket, + currLayerData.weightsBegin (), weightBucket, factorWeightDecay); } else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) ) { - update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (), - currLayerData.weightsBegin (), factorWeightDecay); + update (prevLayerData.valuesBegin (),prevLayerData.valuesEnd (), + currLayerData.deltasBegin (), + currLayerData.deltasEnd (), + currLayerData.valueGradientsBegin (), + currLayerData.gradientsBegin (), gradientBucket, + currLayerData.weightsBegin (), weightBucket, factorWeightDecay); } else { update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ()); + currLayerData.valueGradientsBegin (), + currLayerData.gradientsBegin (), gradientBucket); } else { // no weight regularization update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ()); + currLayerData.deltasBegin (), currLayerData.deltasEnd (), + currLayerData.valueGradientsBegin (), + currLayerData.gradientsBegin (), gradientBucket); } } @@ -730,11 +732,6 @@ template const std::vector& dropFractions = settings.dropFractions (); bool isWeightsForDrop = false; - std::vector gradientBucket; - std::vector deltaBucket; - - initializeGradientsDeltas(std::back_inserter (gradientBucket), std::back_inserter (deltaBucket)); - // until convergence do { @@ -766,7 +763,7 @@ template } // execute training cycle - trainError = trainCycle (minimizer, weightBucket, gradientBucket, begin (trainPattern), end (trainPattern), settings, dropContainer); + trainError = trainCycle (minimizer, weightBucket, begin (trainPattern), end (trainPattern), settings, dropContainer); // ------ check if we have to execute a test ------------------ @@ -807,7 +804,7 @@ template { std::vector localOutput; pass_through_type passThrough (settings, batch, dropContainerTest); - double testBatchError = (*this) (passThrough, weightBucket, gradientBucket, ModeOutput::FETCH, localOutput); + double testBatchError = (*this) (passThrough, weightBucket, ModeOutput::FETCH, localOutput); return std::make_tuple (testBatchError, localOutput); }) ); @@ -924,7 +921,7 @@ template * \param dropContainer the data for dropping-out nodes (regularization technique) */ template - inline double Net::trainCycle (Minimizer& minimizer, std::vector& weightBucket, std::vector& gradientBucket, + inline double Net::trainCycle (Minimizer& minimizer, std::vector& weightBucket, Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer) { double error = 0.0; @@ -985,7 +982,7 @@ template { Batch& batch = *it; pass_through_type settingsAndBatch (settings, batch, dropContainer); - localError += minimizer ((*this), weightBucket, gradientBucket, settingsAndBatch); /// call the minimizer + localError += minimizer ((*this), weightBucket, settingsAndBatch); /// call the minimizer } return localError; }) @@ -1000,7 +997,7 @@ template for (auto& batch : batches) { std::tuple settingsAndBatch (settings, batch, dropContainer); - error += minimizer ((*this), weightBucket, gradientBucket, settingsAndBatch); + error += minimizer ((*this), weightBucket, settingsAndBatch); } } @@ -1020,7 +1017,7 @@ template * \param input the input data * \param weights the weight data */ - template + std::vector Net::compute (const std::vector& input, std::vector& weightBucket) const { std::vector layerData; @@ -1072,7 +1069,7 @@ template } - template + template double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket) const { std::vector nothing; @@ -1082,7 +1079,7 @@ template return error; } - template + template double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket, ModeOutput eFetch, OutContainer& outputContainer) const { MATH_UNUSED(eFetch); @@ -1094,7 +1091,7 @@ template - template + template std::vector> Net::prepareLayerData (LayerContainer& _layers, Batch& batch, const DropContainer& dropContainer, @@ -1232,7 +1229,7 @@ template LayerPatternContainer& layerPatternData, std::vector& valuesMean, std::vector& valuesStdDev, - size_t trainFromLayer, std::vector& weightBucket, std::vector& gradientBucket) const + size_t trainFromLayer, std::vector& weightBucket) const { valuesMean.clear (); valuesStdDev.clear (); @@ -1269,7 +1266,7 @@ template if (doTraining) applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (), - currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin (), gradientBucket); + currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ()); else applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); } @@ -1312,12 +1309,10 @@ template - template + std::tuple Net::computeError (const Settings& settings, std::vector& lastLayerData, - Batch& batch, - int itWeightBegin, - int itWeightEnd) const + Batch& batch, std::vector& weightBucket) const { typename std::vector::iterator itLayerData = lastLayerData.begin (); @@ -1341,9 +1336,8 @@ template LayerData& layerData = (*itLayerData); LayerData& nextLayerData = (*itLayerDataNext); const Pattern& _pattern = (*itPattern); - double error = errorFunction (layerData, nextLayerData, _pattern.output (), - itWeightBegin, itWeightEnd, - _pattern.weight (), settings.factorWeightDecay (), + double error = errorFunction (layerData, nextLayerData, _pattern.output (), + _pattern.weight (), weightBucket, settings.factorWeightDecay (), settings.regularization ()); sumWeights += fabs (_pattern.weight ()); sumError += error; @@ -1354,7 +1348,7 @@ template template - void Net::backPropagate (std::vector>& layerPatternData, std::vector& weightBucket, + void Net::backPropagate (std::vector>& layerPatternData, std::vector& weightBucket, std::vector& gradientBucket, const Settings& settings, size_t trainFromLayer, size_t totalNumWeights) const @@ -1391,7 +1385,7 @@ template // // L1 : -factorWeightDecay*sgn(w)/numWeights // L2 : -factorWeightDecay/numWeights - update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ()); + update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization (), weightBucket, gradientBucket); } } } @@ -1403,7 +1397,7 @@ template * * */ - template + template double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch, int itWeightBegin, int itWeightEnd, int itGradientBegin, int itGradientEnd, @@ -1434,7 +1428,7 @@ template // ---------------------------------- propagate forward ------------------------------------------------------------------ std::vector valuesMean; std::vector valuesStdDev; - forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer, weightBucket, gradientBucket); + forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer, weightBucket); // ------------- fetch output ------------------ @@ -1445,16 +1439,16 @@ template // ------------- error computation ------------- - std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights); + std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, weightBucket); // ------------- backpropagation ------------- - backPropagate (layerPatternData, weightBucket, settings, trainFromLayer, totalNumWeights); + backPropagate (layerPatternData, weightBucket, gradientBucket, settings, trainFromLayer, totalNumWeights); // --- compile the measures double batchSize = std::distance (std::begin (batch), std::end (batch)); - for (auto it = itGradientBegin; it != itGradientEnd; ++it) + for (auto it = gradientBucket.begin(); it != gradientBucket.end(); ++it) (*it) /= batchSize; @@ -1581,9 +1575,8 @@ template double Net::errorFunction (LayerData& layerData, LayerData& nextLayerData, Container truth, - int itWeight, - int itWeightEnd, double patternWeight, + std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization) const { @@ -1593,7 +1586,7 @@ template case ModeErrorFunction::SUMOFSQUARES: { error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth), - layerData.deltasBegin (), layerData.deltasEnd (), layerData.getDeltaBucket(), + layerData.deltasBegin (), layerData.deltasEnd (), layerData.inverseActivationFunction (), patternWeight); break; @@ -1604,7 +1597,7 @@ template std::vector probabilities = layerData.probabilities (); error = crossEntropy (begin (probabilities), end (probabilities), begin (truth), end (truth), - layerData.deltasBegin (), layerData.deltasEnd (), layerData.getDeltaBucket(), + layerData.deltasBegin (), layerData.deltasEnd (), layerData.inverseActivationFunction (), patternWeight); break; @@ -1615,7 +1608,7 @@ template std::vector probabilities = layerData.probabilities (); error = softMaxCrossEntropy (begin (probabilities), end (probabilities), begin (truth), end (truth), - layerData.deltasBegin (), layerData.deltasEnd (), layerData.getDeltaBucket(), + layerData.deltasBegin (), layerData.deltasEnd (), layerData.inverseActivationFunction (), patternWeight); break; @@ -1623,7 +1616,7 @@ template } if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE) { - error = weightDecay (error, layerData.weightsBegin (), nextLayerData.weightsBegin (), factorWeightDecay, eRegularization); + error = weightDecay (error, layerData.weightsBegin (), nextLayerData.weightsBegin (), weightBucket, factorWeightDecay, eRegularization); } return error; } @@ -1746,30 +1739,6 @@ template -/*! \brief initialization of the deltas and gradients - * - * - */ - template - void Net::initializeGradientsDeltas (OutIterator itGradient, OutIterator itDelta) - { - // input and output properties - int numInput = inputSize (); - - - // initialize the deltas and gradients - for (auto& layer: layers ()) - { - for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) - { - (*itGradient) = 0.0; - (*itDelta) = 0.0; - ++itGradient; itDelta++; - } - numInput = layer.numNodes (); - } - return; - } From 70484aa0e69649bd043df8ebb29d28cb3005d976 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Thu, 25 Aug 2016 15:42:43 +0530 Subject: [PATCH 25/42] Successful compile --- tmva/tmva/inc/TMVA/NeuralNet.h | 101 +++++++++++++++------------------ 1 file changed, 47 insertions(+), 54 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 0b244b9f69452..d022522b5ec70 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -255,13 +255,13 @@ namespace TMVA - template + template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int itWeight, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd); - template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd); + template + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop); @@ -271,25 +271,25 @@ namespace TMVA void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc); - template - void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, ItInverseFunction itInverseFunction, int itGradient, std::vector& gradientBucket); + template + void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, ItInverseFunction invFnc, ItGradient itGradient); - template + template void update (ItSource itSource, ItSource itSourceEnd, - int itTargetDeltaBegin, int itTargetDeltaEnd, std::vector& deltaBucket, - int itTargetGradientBegin, + ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, + ItTargetGradient itTargetGradientBegin, int itGradient, std::vector& gradientBucket); - template + template void update (ItSource itSource, ItSource itSourceEnd, - int itTargetDeltaBegin, int itTargetDeltaEnd, std::vector& deltaBucket, - int itTargetGradientBegin, - int itGradient, std::vector& gradientBucket, - int itWeight, std::vector& weightBucket, double weightDecay); + ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, + ItTargetGradient itTargetGradientBegin, + int itGradient, std::vector& gradientBucket, + int itWeight, std::vector& weightBucket, double& weightDecay); @@ -363,8 +363,8 @@ namespace TMVA * is not touched by the minimizer; This object is provided to the fitness function when * called */ - template - double operator() (Function& fitnessFunction, std::vector& weightBucket, std::vector& gradientBucket, PassThrough& passThrough); + template + double operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough); double m_alpha; ///< internal parameter (learningRate) @@ -389,25 +389,25 @@ namespace TMVA - template - double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc itInvActFnc, double patternWeight); + template + double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight); template - double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc /*itInvActFnc*/, double patternWeight); + double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight); template - double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, int itDelta, int itDeltaEnd, std::vector& deltaBucket, ItInvActFnc /*itInvActFnc*/, double patternWeight); + double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight); + - template double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization); @@ -515,8 +515,8 @@ namespace TMVA : m_size (other.m_size) , m_itInputBegin (other.m_itInputBegin) , m_itInputEnd (other.m_itInputEnd) - , m_deltaBucket (other.m_deltaBucket) - , m_valueGradientBucket (other.m_valueGradientBucket) + , m_deltas (other.m_deltas) + , m_valueGradients (other.m_valueGradients) , m_values (other.m_values) , m_itDropOut (other.m_itDropOut) , m_hasDropOut (other.m_hasDropOut) @@ -538,8 +538,8 @@ namespace TMVA : m_size (other.m_size) , m_itInputBegin (other.m_itInputBegin) , m_itInputEnd (other.m_itInputEnd) - , m_deltaBucket (std::move(other.m_deltaBucket)) - , m_valueGradientBucket (std::move(other.m_valueGradientBucket)) + , m_deltas (std::move(other.m_deltas)) + , m_valueGradients (std::move(other.m_valueGradients)) , m_values (std::move(other.m_values)) , m_itDropOut (other.m_itDropOut) , m_hasDropOut (other.m_hasDropOut) @@ -575,7 +575,7 @@ namespace TMVA void clear () { m_values.assign (m_values.size (), 0.0); - m_deltaBucket.assign (m_deltaBucket.size (), 0.0); + m_deltas.assign (m_deltas.size (), 0.0); } const_iterator_type valuesBegin () const { return m_isInputLayer ? m_itInputBegin : begin (m_values); } ///< returns const iterator to the begin of the (node) values @@ -587,24 +587,22 @@ namespace TMVA ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them - int deltasBegin () { return m_itGradientBegin; } ///< returns iterator to the begin of the deltas (back-propagation) - int deltasEnd () { return (m_deltaBucket.size() * BUCKET_SIZE + m_itGradientBegin); } ///< returns iterator to the end of the deltas (back-propagation) + iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation) + iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation) - int deltasBegin () const { return m_itGradientBegin; } ///< returns const int to the begin of the deltas (back-propagation) - int deltasEnd () const { return (m_deltaBucket.size() * BUCKET_SIZE + m_itGradientBegin); } ///< returns const int to the end of the deltas (back-propagation) + const_iterator_type deltasBegin () const { return begin (m_deltas); } ///< returns const iterator to the begin of the deltas (back-propagation) + const_iterator_type deltasEnd () const { return end (m_deltas); } ///< returns const iterator to the end of the deltas (back-propagation) - int valueGradientsBegin () { return m_itGradientBegin; } ///< returns int to the begin of the gradients of the node values - int valueGradientsEnd () { return (m_valueGradientBucket.size() * BUCKET_SIZE + m_itGradientBegin); } ///< returns int to the end of the gradients of the node values + iterator_type valueGradientsBegin () { return begin (m_valueGradients); } ///< returns iterator to the begin of the gradients of the node values + iterator_type valueGradientsEnd () { return end (m_valueGradients); } ///< returns iterator to the end of the gradients of the node values - int valueGradientsBegin () const { return m_itGradientBegin; } ///< returns const int to the begin of the gradients - int valueGradientsEnd () const { return (m_valueGradientBucket.size() * BUCKET_SIZE + m_itGradientBegin); } ///< returns const int to the end of the gradients + const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients + const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients int gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients int gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients int weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer - std::vector getGradientBucket () { return m_valueGradientBucket; } // returns gradient bucket. - std::vector getDeltaBucket () { return m_deltaBucket; } // returns gradient bucket. std::shared_ptr> activationFunction () const { return m_activationFunction; } std::shared_ptr> inverseActivationFunction () const { return m_inverseActivationFunction; } @@ -641,8 +639,8 @@ namespace TMVA const_iterator_type m_itInputBegin; ///< iterator to the first of the nodes in the input node vector const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector - std::vector m_deltaBucket; ///< stores the deltas for the DNN training - std::vector m_valueGradientBucket; ///< stores the gradients of the values (nodes) + std::vector m_deltas; ///< stores the deltas for the DNN training + std::vector m_valueGradients; ///< stores the gradients of the values (nodes) std::vector m_values; ///< stores the values of the nodes in this layer const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped @@ -724,7 +722,7 @@ namespace TMVA template - void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization); + void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket); @@ -1149,7 +1147,7 @@ namespace TMVA * \param dropContainer the configuration for DNN drop-out */ template - inline double trainCycle (Minimizer& minimizer, std::vector& weightBucket, std::vector& gradientBucket, + inline double trainCycle (Minimizer& minimizer, std::vector& weightBucket, Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer); @@ -1161,7 +1159,7 @@ namespace TMVA size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net - template + std::vector compute (const std::vector& input, std::vector& weightBucket) const; ///< compute the net with the given input and the given weights template @@ -1170,14 +1168,14 @@ namespace TMVA template double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function - template + template double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well - template + template double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket, ModeOutput eFetch, OutContainer& outputContainer) const; - template + template std::vector> prepareLayerData (LayerContainer& layers, Batch& batch, const DropContainer& dropContainer, @@ -1195,7 +1193,7 @@ namespace TMVA LayerPatternContainer& layerPatternData, std::vector& valuesMean, std::vector& valuesStdDev, - size_t trainFromLayer, std::vector& weightBucket, std::vector& gradientBucket) const; + size_t trainFromLayer, std::vector& weightBucket) const; template void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const; @@ -1204,15 +1202,13 @@ namespace TMVA void fetchOutput (const std::vector& layerPatternData, OutputContainer& outputContainer) const; - template + std::tuple computeError (const Settings& settings, std::vector& lastLayerData, - Batch& batch, - int itWeightBegin, - int itWeightEnd) const; + Batch& batch, std::vector& weightBucket) const; template - void backPropagate (std::vector>& layerPatternData, std::vector& weightBucket, + void backPropagate (std::vector>& layerPatternData, std::vector& weightBucket, std::vector& gradientBucket, const Settings& settings, size_t trainFromLayer, size_t totalNumWeights) const; @@ -1223,7 +1219,7 @@ namespace TMVA * * */ - template + template double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch, int itWeightBegin, int itWeightEnd, int itGradientBegin, int itGradientEnd, @@ -1244,9 +1240,8 @@ namespace TMVA double errorFunction (LayerData& layerData, LayerData& nextLayerData, Container truth, - int itWeight, - int itWeightEnd, double patternWeight, + std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization) const; @@ -1268,8 +1263,6 @@ namespace TMVA void initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight); ///< initialize the weights with the given strategy - template - void initializeGradientsDeltas (OutIterator itGradient, OutIterator itDelta); ///< initialize the weights with the given strategy protected: From 6a0932f7ec41b9af5c5f9dbf94534b6dee21c08a Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Thu, 25 Aug 2016 15:43:44 +0530 Subject: [PATCH 26/42] Successful compile --- tmva/tmva/src/NeuralNet.cxx | 875 ++++++++++++++++++------------------ 1 file changed, 445 insertions(+), 430 deletions(-) diff --git a/tmva/tmva/src/NeuralNet.cxx b/tmva/tmva/src/NeuralNet.cxx index a8394f649ecc1..cc1f221959aab 100644 --- a/tmva/tmva/src/NeuralNet.cxx +++ b/tmva/tmva/src/NeuralNet.cxx @@ -5,193 +5,198 @@ namespace TMVA { - namespace DNN - { + namespace DNN + { - double gaussDouble (double mean, double sigma) - { - static std::default_random_engine generator; - std::normal_distribution distribution (mean, sigma); - return distribution (generator); - } + double gaussDouble (double mean, double sigma) + { + static std::default_random_engine generator; + std::normal_distribution distribution (mean, sigma); + return distribution (generator); + } - double uniformDouble (double minValue, double maxValue) - { - static std::default_random_engine generator; - std::uniform_real_distribution distribution(minValue, maxValue); - return distribution(generator); - } + double uniformDouble (double minValue, double maxValue) + { + static std::default_random_engine generator; + std::uniform_real_distribution distribution(minValue, maxValue); + return distribution(generator); + } - int randomInt (int maxValue) - { - static std::default_random_engine generator; - std::uniform_int_distribution distribution(0,maxValue-1); - return distribution(generator); - } - - - double studenttDouble (double distributionParameter) - { - static std::default_random_engine generator; - std::student_t_distribution distribution (distributionParameter); - return distribution (generator); - } - - - LayerData::LayerData (size_t inputSize) - : m_isInputLayer (true) - , m_hasWeights (false) - , m_hasGradients (false) - , m_eModeOutput (ModeOutputValues::DIRECT) - { - m_size = inputSize; - m_deltas.assign (m_size, 0); - } - - - - LayerData::LayerData (const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput) - : m_isInputLayer (true) - , m_hasWeights (false) - , m_hasGradients (false) - , m_eModeOutput (eModeOutput) - { - m_itInputBegin = itInputBegin; - m_itInputEnd = itInputEnd; - m_size = std::distance (itInputBegin, itInputEnd); - m_deltas.assign (m_size, 0); - } - - - - - LayerData::LayerData (size_t _size, - const_iterator_type itWeightBegin, - iterator_type itGradientBegin, - std::shared_ptr> _activationFunction, - std::shared_ptr> _inverseActivationFunction, - ModeOutputValues eModeOutput) - : m_size (_size) - , m_itConstWeightBegin (itWeightBegin) - , m_itGradientBegin (itGradientBegin) - , m_activationFunction (_activationFunction) - , m_inverseActivationFunction (_inverseActivationFunction) - , m_isInputLayer (false) - , m_hasWeights (true) - , m_hasGradients (true) - , m_eModeOutput (eModeOutput) - { - m_values.assign (_size, 0); - m_deltas.assign (_size, 0); - m_valueGradients.assign (_size, 0); - } - - - - - LayerData::LayerData (size_t _size, const_iterator_type itWeightBegin, - std::shared_ptr> _activationFunction, - ModeOutputValues eModeOutput) - : m_size (_size) - , m_itConstWeightBegin (itWeightBegin) - , m_activationFunction (_activationFunction) - , m_isInputLayer (false) - , m_hasWeights (true) - , m_hasGradients (false) - , m_eModeOutput (eModeOutput) - { - m_values.assign (_size, 0); - } - - - - typename LayerData::container_type LayerData::computeProbabilities () - { - container_type probabilitiesContainer; - if (TMVA::DNN::isFlagSet (ModeOutputValues::SIGMOID, m_eModeOutput)) + int randomInt (int maxValue) + { + static std::default_random_engine generator; + std::uniform_int_distribution distribution(0,maxValue-1); + return distribution(generator); + } + + + double studenttDouble (double distributionParameter) + { + static std::default_random_engine generator; + std::student_t_distribution distribution (distributionParameter); + return distribution (generator); + } + + + LayerData::LayerData (size_t inputSize) + : m_hasDropOut (false) + , m_isInputLayer (true) + , m_hasWeights (false) + , m_hasGradients (false) + , m_eModeOutput (ModeOutputValues::DIRECT) + { + m_size = inputSize; + m_deltas.assign (m_size, 0); + } + + + + LayerData::LayerData (const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput) + : m_hasDropOut (false) + , m_isInputLayer (true) + , m_hasWeights (false) + , m_hasGradients (false) + , m_eModeOutput (eModeOutput) + { + m_itInputBegin = itInputBegin; + m_itInputEnd = itInputEnd; + m_size = std::distance (itInputBegin, itInputEnd); + m_deltas.assign (m_size, 0); + } + + + + + LayerData::LayerData (size_t _size, + int itWeightBegin, + int itGradientBegin, + std::shared_ptr> _activationFunction, + std::shared_ptr> _inverseActivationFunction, + ModeOutputValues eModeOutput) + : m_size (_size) + , m_hasDropOut (false) + , m_itConstWeightBegin (itWeightBegin) + , m_itGradientBegin (itGradientBegin) + , m_activationFunction (_activationFunction) + , m_inverseActivationFunction (_inverseActivationFunction) + , m_isInputLayer (false) + , m_hasWeights (true) + , m_hasGradients (true) + , m_eModeOutput (eModeOutput) + { + m_values.assign (_size, 0); + m_deltas.assign (_size, 0); + m_valueGradients.assign (_size, 0); + } + + + + + LayerData::LayerData (size_t _size, int itWeightBegin, + std::shared_ptr> _activationFunction, + ModeOutputValues eModeOutput) + : m_size (_size) + , m_hasDropOut (false) + , m_itConstWeightBegin (itWeightBegin) + , m_activationFunction (_activationFunction) + , m_inverseActivationFunction () + , m_isInputLayer (false) + , m_hasWeights (true) + , m_hasGradients (false) + , m_eModeOutput (eModeOutput) + { + m_values.assign (_size, 0); + } + + + + typename LayerData::container_type LayerData::computeProbabilities () const + { + container_type probabilitiesContainer; + if (TMVA::DNN::isFlagSet (ModeOutputValues::SIGMOID, m_eModeOutput)) { - std::transform (begin (m_values), end (m_values), std::back_inserter (probabilitiesContainer), (*Sigmoid.get ())); + std::transform (begin (m_values), end (m_values), std::back_inserter (probabilitiesContainer), (*Sigmoid.get ())); } - else if (TMVA::DNN::isFlagSet (ModeOutputValues::SOFTMAX, m_eModeOutput)) + else if (TMVA::DNN::isFlagSet (ModeOutputValues::SOFTMAX, m_eModeOutput)) { - double sum = 0; - probabilitiesContainer = m_values; - std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [&sum](double& p){ p = std::exp (p); sum += p; }); - if (sum != 0) - std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [sum ](double& p){ p /= sum; }); + double sum = 0; + probabilitiesContainer = m_values; + std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [&sum](double& p){ p = std::exp (p); sum += p; }); + if (sum != 0) + std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [sum ](double& p){ p /= sum; }); } - else + else { - probabilitiesContainer.assign (begin (m_values), end (m_values)); + probabilitiesContainer.assign (begin (m_values), end (m_values)); } - return probabilitiesContainer; - } + return probabilitiesContainer; + } - Layer::Layer (size_t _numNodes, EnumFunction _activationFunction, ModeOutputValues eModeOutputValues) - : m_numNodes (_numNodes) - , m_eModeOutputValues (eModeOutputValues) - , m_activationFunctionType (_activationFunction) - { - for (size_t iNode = 0; iNode < _numNodes; ++iNode) + Layer::Layer (size_t _numNodes, EnumFunction _activationFunction, ModeOutputValues eModeOutputValues) + : m_numNodes (_numNodes) + , m_eModeOutputValues (eModeOutputValues) + , m_activationFunctionType (_activationFunction) + { + for (size_t iNode = 0; iNode < _numNodes; ++iNode) { - auto actFnc = Linear; - auto invActFnc = InvLinear; - switch (_activationFunction) - { - case EnumFunction::ZERO: - actFnc = ZeroFnc; - invActFnc = ZeroFnc; - break; - case EnumFunction::LINEAR: - actFnc = Linear; - invActFnc = InvLinear; - break; - case EnumFunction::TANH: - actFnc = Tanh; - invActFnc = InvTanh; - break; - case EnumFunction::RELU: - actFnc = ReLU; - invActFnc = InvReLU; - break; - case EnumFunction::SYMMRELU: - actFnc = SymmReLU; - invActFnc = InvSymmReLU; - break; - case EnumFunction::TANHSHIFT: - actFnc = TanhShift; - invActFnc = InvTanhShift; - break; - case EnumFunction::SOFTSIGN: - actFnc = SoftSign; - invActFnc = InvSoftSign; - break; - case EnumFunction::SIGMOID: - actFnc = Sigmoid; - invActFnc = InvSigmoid; - break; - case EnumFunction::GAUSS: - actFnc = Gauss; - invActFnc = InvGauss; - break; - case EnumFunction::GAUSSCOMPLEMENT: - actFnc = GaussComplement; - invActFnc = InvGaussComplement; - break; - } - m_activationFunction = actFnc; - m_inverseActivationFunction = invActFnc; + auto actFnc = Linear; + auto invActFnc = InvLinear; + switch (_activationFunction) + { + case EnumFunction::ZERO: + actFnc = ZeroFnc; + invActFnc = ZeroFnc; + break; + case EnumFunction::LINEAR: + actFnc = Linear; + invActFnc = InvLinear; + break; + case EnumFunction::TANH: + actFnc = Tanh; + invActFnc = InvTanh; + break; + case EnumFunction::RELU: + actFnc = ReLU; + invActFnc = InvReLU; + break; + case EnumFunction::SYMMRELU: + actFnc = SymmReLU; + invActFnc = InvSymmReLU; + break; + case EnumFunction::TANHSHIFT: + actFnc = TanhShift; + invActFnc = InvTanhShift; + break; + case EnumFunction::SOFTSIGN: + actFnc = SoftSign; + invActFnc = InvSoftSign; + break; + case EnumFunction::SIGMOID: + actFnc = Sigmoid; + invActFnc = InvSigmoid; + break; + case EnumFunction::GAUSS: + actFnc = Gauss; + invActFnc = InvGauss; + break; + case EnumFunction::GAUSSCOMPLEMENT: + actFnc = GaussComplement; + invActFnc = InvGaussComplement; + break; + } + m_activationFunction = actFnc; + m_inverseActivationFunction = invActFnc; } - } + } @@ -202,40 +207,38 @@ namespace TMVA - Settings::Settings (TString name, - size_t _convergenceSteps, size_t _batchSize, size_t _testRepetitions, - double _factorWeightDecay, EnumRegularization eRegularization, - MinimizerType _eMinimizerType, double _learningRate, - double _momentum, int _repetitions, bool _useMultithreading, - bool _doBatchNormalization) - : m_timer (100, name) - , m_minProgress (0) - , m_maxProgress (100) - , m_convergenceSteps (_convergenceSteps) - , m_batchSize (_batchSize) - , m_testRepetitions (_testRepetitions) - , m_factorWeightDecay (_factorWeightDecay) - , count_E (0) - , count_dE (0) - , count_mb_E (0) - , count_mb_dE (0) - , m_regularization (eRegularization) - , fLearningRate (_learningRate) - , fMomentum (_momentum) - , fRepetitions (_repetitions) - , fMinimizerType (_eMinimizerType) - , m_convergenceCount (0) - , m_maxConvergenceCount (0) - , m_minError (1e10) - , m_useMultithreading (_useMultithreading) - , m_doBatchNormalization (_doBatchNormalization) - , fMonitoring (NULL) - { - } + Settings::Settings (TString name, + size_t _convergenceSteps, size_t _batchSize, size_t _testRepetitions, + double _factorWeightDecay, EnumRegularization eRegularization, + MinimizerType _eMinimizerType, double _learningRate, + double _momentum, int _repetitions, bool _useMultithreading) + : m_timer (100, name) + , m_minProgress (0) + , m_maxProgress (100) + , m_convergenceSteps (_convergenceSteps) + , m_batchSize (_batchSize) + , m_testRepetitions (_testRepetitions) + , m_factorWeightDecay (_factorWeightDecay) + , count_E (0) + , count_dE (0) + , count_mb_E (0) + , count_mb_dE (0) + , m_regularization (eRegularization) + , fLearningRate (_learningRate) + , fMomentum (_momentum) + , fRepetitions (_repetitions) + , fMinimizerType (_eMinimizerType) + , m_convergenceCount (0) + , m_maxConvergenceCount (0) + , m_minError (1e10) + , m_useMultithreading (_useMultithreading) + , fMonitoring (NULL) + { + } - Settings::~Settings () - { - } + Settings::~Settings () + { + } @@ -250,260 +253,260 @@ namespace TMVA - /** \brief action to be done when the training cycle is started (e.g. update some monitoring output) - * - */ - void ClassificationSettings::startTrainCycle () - { - if (fMonitoring) + /** \brief action to be done when the training cycle is started (e.g. update some monitoring output) + * + */ + void ClassificationSettings::startTrainCycle () + { + if (fMonitoring) { - create ("ROC", 100, 0, 1, 100, 0, 1); - create ("Significance", 100, 0, 1, 100, 0, 3); - create ("OutputSig", 100, 0, 1); - create ("OutputBkg", 100, 0, 1); - fMonitoring->ProcessEvents (); + create ("ROC", 100, 0, 1, 100, 0, 1); + create ("Significance", 100, 0, 1, 100, 0, 3); + create ("OutputSig", 100, 0, 1); + create ("OutputBkg", 100, 0, 1); + fMonitoring->ProcessEvents (); } - } - - /** \brief action to be done when the training cycle is ended (e.g. update some monitoring output) - * - */ - void ClassificationSettings::endTrainCycle (double /*error*/) - { - if (fMonitoring) fMonitoring->ProcessEvents (); - } - - /** \brief action to be done after the computation of a test sample (e.g. update some monitoring output) - * - */ - void ClassificationSettings::testSample (double /*error*/, double output, double target, double weight) - { + } + + /** \brief action to be done when the training cycle is ended (e.g. update some monitoring output) + * + */ + void ClassificationSettings::endTrainCycle (double /*error*/) + { + if (fMonitoring) fMonitoring->ProcessEvents (); + } + + /** \brief action to be done after the computation of a test sample (e.g. update some monitoring output) + * + */ + void ClassificationSettings::testSample (double /*error*/, double output, double target, double weight) + { - m_output.push_back (output); - m_targets.push_back (target); - m_weights.push_back (weight); - } - - - /** \brief action to be done when the test cycle is started (e.g. update some monitoring output) - * - */ - void ClassificationSettings::startTestCycle () - { - m_output.clear (); - m_targets.clear (); - m_weights.clear (); - } - - /** \brief action to be done when the training cycle is ended (e.g. update some monitoring output) - * - */ - void ClassificationSettings::endTestCycle () - { - if (m_output.empty ()) - return; - double minVal = *std::min_element (begin (m_output), end (m_output)); - double maxVal = *std::max_element (begin (m_output), end (m_output)); - const size_t numBinsROC = 1000; - const size_t numBinsData = 100; - - std::vector truePositives (numBinsROC+1, 0); - std::vector falsePositives (numBinsROC+1, 0); - std::vector trueNegatives (numBinsROC+1, 0); - std::vector falseNegatives (numBinsROC+1, 0); - - std::vector x (numBinsData, 0); - std::vector datSig (numBinsData+1, 0); - std::vector datBkg (numBinsData+1, 0); - - double binSizeROC = (maxVal - minVal)/(double)numBinsROC; - double binSizeData = (maxVal - minVal)/(double)numBinsData; - - double sumWeightsSig = 0.0; - double sumWeightsBkg = 0.0; - - for (size_t b = 0; b < numBinsData; ++b) + m_output.push_back (output); + m_targets.push_back (target); + m_weights.push_back (weight); + } + + + /** \brief action to be done when the test cycle is started (e.g. update some monitoring output) + * + */ + void ClassificationSettings::startTestCycle () + { + m_output.clear (); + m_targets.clear (); + m_weights.clear (); + } + + /** \brief action to be done when the training cycle is ended (e.g. update some monitoring output) + * + */ + void ClassificationSettings::endTestCycle () + { + if (m_output.empty ()) + return; + double minVal = *std::min_element (begin (m_output), end (m_output)); + double maxVal = *std::max_element (begin (m_output), end (m_output)); + const size_t numBinsROC = 1000; + const size_t numBinsData = 100; + + std::vector truePositives (numBinsROC+1, 0); + std::vector falsePositives (numBinsROC+1, 0); + std::vector trueNegatives (numBinsROC+1, 0); + std::vector falseNegatives (numBinsROC+1, 0); + + std::vector x (numBinsData, 0); + std::vector datSig (numBinsData+1, 0); + std::vector datBkg (numBinsData+1, 0); + + double binSizeROC = (maxVal - minVal)/(double)numBinsROC; + double binSizeData = (maxVal - minVal)/(double)numBinsData; + + double sumWeightsSig = 0.0; + double sumWeightsBkg = 0.0; + + for (size_t b = 0; b < numBinsData; ++b) { - double binData = minVal + b*binSizeData; - x.at (b) = binData; + double binData = minVal + b*binSizeData; + x.at (b) = binData; } - if (fabs(binSizeROC) < 0.0001) - return; + if (fabs(binSizeROC) < 0.0001) + return; - for (size_t i = 0, iEnd = m_output.size (); i < iEnd; ++i) + for (size_t i = 0, iEnd = m_output.size (); i < iEnd; ++i) { - double val = m_output.at (i); - double truth = m_targets.at (i); - double weight = m_weights.at (i); + double val = m_output.at (i); + double truth = m_targets.at (i); + double weight = m_weights.at (i); - bool isSignal = (truth > 0.5 ? true : false); + bool isSignal = (truth > 0.5 ? true : false); - if (m_sumOfSigWeights != 0 && m_sumOfBkgWeights != 0) - { - if (isSignal) + if (m_sumOfSigWeights != 0 && m_sumOfBkgWeights != 0) + { + if (isSignal) weight *= m_sumOfSigWeights; - else + else weight *= m_sumOfBkgWeights; - } - - size_t binROC = (val-minVal)/binSizeROC; - size_t binData = (val-minVal)/binSizeData; - - if (isSignal) - { - for (size_t n = 0; n <= binROC; ++n) - { - truePositives.at (n) += weight; - } - for (size_t n = binROC+1; n < numBinsROC; ++n) - { - falseNegatives.at (n) += weight; - } - - datSig.at (binData) += weight; - sumWeightsSig += weight; - } - else - { - for (size_t n = 0; n <= binROC; ++n) - { - falsePositives.at (n) += weight; - } - for (size_t n = binROC+1; n < numBinsROC; ++n) - { - trueNegatives.at (n) += weight; - } - - datBkg.at (binData) += weight; - sumWeightsBkg += weight; - } + } + + size_t binROC = (val-minVal)/binSizeROC; + size_t binData = (val-minVal)/binSizeData; + + if (isSignal) + { + for (size_t n = 0; n <= binROC; ++n) + { + truePositives.at (n) += weight; + } + for (size_t n = binROC+1; n < numBinsROC; ++n) + { + falseNegatives.at (n) += weight; + } + + datSig.at (binData) += weight; + sumWeightsSig += weight; + } + else + { + for (size_t n = 0; n <= binROC; ++n) + { + falsePositives.at (n) += weight; + } + for (size_t n = binROC+1; n < numBinsROC; ++n) + { + trueNegatives.at (n) += weight; + } + + datBkg.at (binData) += weight; + sumWeightsBkg += weight; + } } - std::vector sigEff; - std::vector backRej; + std::vector sigEff; + std::vector backRej; - double bestSignificance = 0; - double bestCutSignificance = 0; + double bestSignificance = 0; + double bestCutSignificance = 0; - double numEventsScaleFactor = 1.0; - if (m_scaleToNumEvents > 0) + double numEventsScaleFactor = 1.0; + if (m_scaleToNumEvents > 0) { - size_t numEvents = m_output.size (); - numEventsScaleFactor = double (m_scaleToNumEvents)/double (numEvents); + size_t numEvents = m_output.size (); + numEventsScaleFactor = double (m_scaleToNumEvents)/double (numEvents); } - clear ("ROC"); - clear ("Significance"); + clear ("ROC"); + clear ("Significance"); - for (size_t i = 0; i < numBinsROC; ++i) + for (size_t i = 0; i < numBinsROC; ++i) { - double tp = truePositives.at (i) * numEventsScaleFactor; - double fp = falsePositives.at (i) * numEventsScaleFactor; - double tn = trueNegatives.at (i) * numEventsScaleFactor; - double fn = falseNegatives.at (i) * numEventsScaleFactor; + double tp = truePositives.at (i) * numEventsScaleFactor; + double fp = falsePositives.at (i) * numEventsScaleFactor; + double tn = trueNegatives.at (i) * numEventsScaleFactor; + double fn = falseNegatives.at (i) * numEventsScaleFactor; - double seff = (tp+fn == 0.0 ? 1.0 : (tp / (tp+fn))); - double brej = (tn+fp == 0.0 ? 0.0 : (tn / (tn+fp))); + double seff = (tp+fn == 0.0 ? 1.0 : (tp / (tp+fn))); + double brej = (tn+fp == 0.0 ? 0.0 : (tn / (tn+fp))); - sigEff.push_back (seff); - backRej.push_back (brej); + sigEff.push_back (seff); + backRej.push_back (brej); - // m_histROC->Fill (seff, brej); - addPoint ("ROC", seff, brej); // x, y + // m_histROC->Fill (seff, brej); + addPoint ("ROC", seff, brej); // x, y - double currentCut = (i * binSizeROC)+minVal; + double currentCut = (i * binSizeROC)+minVal; - double sig = tp; - double bkg = fp; - double significance = sig / sqrt (sig + bkg); - if (significance > bestSignificance) - { - bestSignificance = significance; - bestCutSignificance = currentCut; - } + double sig = tp; + double bkg = fp; + double significance = sig / sqrt (sig + bkg); + if (significance > bestSignificance) + { + bestSignificance = significance; + bestCutSignificance = currentCut; + } - addPoint ("Significance", currentCut, significance); - // m_histSignificance->Fill (currentCut, significance); + addPoint ("Significance", currentCut, significance); + // m_histSignificance->Fill (currentCut, significance); } - m_significances.push_back (bestSignificance); - static size_t testCycle = 0; + m_significances.push_back (bestSignificance); + static size_t testCycle = 0; - clear ("OutputSig"); - clear ("OutputBkg"); - for (size_t i = 0; i < numBinsData; ++i) + clear ("OutputSig"); + clear ("OutputBkg"); + for (size_t i = 0; i < numBinsData; ++i) { - addPoint ("OutputSig", x.at (i), datSig.at (i)/sumWeightsSig); - addPoint ("OutputBkg", x.at (i), datBkg.at (i)/sumWeightsBkg); - // m_histOutputSignal->Fill (x.at (i), datSig.at (1)/sumWeightsSig); - // m_histOutputBackground->Fill (x.at (i), datBkg.at (1)/sumWeightsBkg); + addPoint ("OutputSig", x.at (i), datSig.at (i)/sumWeightsSig); + addPoint ("OutputBkg", x.at (i), datBkg.at (i)/sumWeightsBkg); + // m_histOutputSignal->Fill (x.at (i), datSig.at (1)/sumWeightsSig); + // m_histOutputBackground->Fill (x.at (i), datBkg.at (1)/sumWeightsBkg); } - ++testCycle; + ++testCycle; - if (fMonitoring) + if (fMonitoring) { - plot ("ROC", "", 2, kRed); - plot ("Significance", "", 3, kRed); - plot ("OutputSig", "", 4, kRed); - plot ("OutputBkg", "same", 4, kBlue); - fMonitoring->ProcessEvents (); + plot ("ROC", "", 2, kRed); + plot ("Significance", "", 3, kRed); + plot ("OutputSig", "", 4, kRed); + plot ("OutputBkg", "same", 4, kBlue); + fMonitoring->ProcessEvents (); } - m_cutValue = bestCutSignificance; - } + m_cutValue = bestCutSignificance; + } - /** \brief check for convergence - * - */ - bool Settings::hasConverged (double testError) - { - // std::cout << "check convergence; minError " << m_minError << " current " << testError - // << " current convergence count " << m_convergenceCount << std::endl; - if (testError < m_minError*0.999) + /** \brief check for convergence + * + */ + bool Settings::hasConverged (double testError) + { + // std::cout << "check convergence; minError " << m_minError << " current " << testError + // << " current convergence count " << m_convergenceCount << std::endl; + if (testError < m_minError*0.999) { - m_convergenceCount = 0; - m_minError = testError; + m_convergenceCount = 0; + m_minError = testError; } - else + else { - ++m_convergenceCount; - m_maxConvergenceCount = std::max (m_convergenceCount, m_maxConvergenceCount); + ++m_convergenceCount; + m_maxConvergenceCount = std::max (m_convergenceCount, m_maxConvergenceCount); } - if (m_convergenceCount >= convergenceSteps () || testError <= 0) - return true; + if (m_convergenceCount >= convergenceSteps () || testError <= 0) + return true; - return false; - } + return false; + } - /** \brief set the weight sums to be scaled to (preparations for monitoring output) - * - */ - void ClassificationSettings::setWeightSums (double sumOfSigWeights, double sumOfBkgWeights) - { - m_sumOfSigWeights = sumOfSigWeights; m_sumOfBkgWeights = sumOfBkgWeights; - } + /** \brief set the weight sums to be scaled to (preparations for monitoring output) + * + */ + void ClassificationSettings::setWeightSums (double sumOfSigWeights, double sumOfBkgWeights) + { + m_sumOfSigWeights = sumOfSigWeights; m_sumOfBkgWeights = sumOfBkgWeights; + } - /** \brief preparation for monitoring output - * - */ - void ClassificationSettings::setResultComputation ( - std::string _fileNameNetConfig, - std::string _fileNameResult, - std::vector* _resultPatternContainer) - { - m_pResultPatternContainer = _resultPatternContainer; - m_fileNameResult = _fileNameResult; - m_fileNameNetConfig = _fileNameNetConfig; - } + /** \brief preparation for monitoring output + * + */ + void ClassificationSettings::setResultComputation ( + std::string _fileNameNetConfig, + std::string _fileNameResult, + std::vector* _resultPatternContainer) + { + m_pResultPatternContainer = _resultPatternContainer; + m_fileNameResult = _fileNameResult; + m_fileNameNetConfig = _fileNameNetConfig; + } @@ -512,39 +515,51 @@ namespace TMVA - /** \brief compute the number of weights given the size of the input layer - * - */ - size_t Net::numWeights (size_t trainingStartLayer) const - { - size_t num (0); - size_t index (0); - size_t prevNodes (inputSize ()); - for (auto& layer : m_layers) + /** \brief compute the number of weights given the size of the input layer + * + */ + size_t Net::numWeights (size_t trainingStartLayer) const + { + size_t num (0); + size_t index (0); + size_t prevNodes (inputSize ()); + for (auto& layer : m_layers) { - if (index >= trainingStartLayer) - num += layer.numWeights (prevNodes); - prevNodes = layer.numNodes (); - ++index; + if (index >= trainingStartLayer) + num += layer.numWeights (prevNodes); + prevNodes = layer.numNodes (); + ++index; } - return num; - } + return num; + } + size_t Net::numNodes (size_t trainingStartLayer) const + { + size_t num (0); + size_t index (0); + for (auto& layer : m_layers) + { + if (index >= trainingStartLayer) + num += layer.numNodes (); + ++index; + } + return num; + } - /** \brief prepare the drop-out container given the provided drop-fractions - * - */ - void Net::fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const - { - size_t numDrops = dropFraction * numNodes; - if (numDrops >= numNodes) // maintain at least one node - numDrops = numNodes - 1; - dropContainer.insert (end (dropContainer), numNodes-numDrops, true); // add the markers for the nodes which are enabled - dropContainer.insert (end (dropContainer), numDrops, false); // add the markers for the disabled nodes - // shuffle - std::random_shuffle (end (dropContainer)-numNodes, end (dropContainer)); // shuffle enabled and disabled markers - } + /** \brief prepare the drop-out container given the provided drop-fractions + * + */ + void Net::fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t _numNodes) const + { + size_t numDrops = dropFraction * _numNodes; + if (numDrops >= _numNodes) // maintain at least one node + numDrops = _numNodes - 1; + dropContainer.insert (end (dropContainer), _numNodes-numDrops, true); // add the markers for the nodes which are enabled + dropContainer.insert (end (dropContainer), numDrops, false); // add the markers for the disabled nodes + // shuffle + std::random_shuffle (end (dropContainer)-_numNodes, end (dropContainer)); // shuffle enabled and disabled markers + } @@ -553,6 +568,6 @@ namespace TMVA - }; // namespace DNN + }; // namespace DNN }; // namespace TMVA From 4f5b9193305444abdad0bd85a8358b4085c934ac Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Thu, 25 Aug 2016 15:44:27 +0530 Subject: [PATCH 27/42] Successful compile --- tmva/tmva/src/MethodDNN.cxx | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tmva/tmva/src/MethodDNN.cxx b/tmva/tmva/src/MethodDNN.cxx index 67525af96fd50..c7597a179f5c4 100644 --- a/tmva/tmva/src/MethodDNN.cxx +++ b/tmva/tmva/src/MethodDNN.cxx @@ -786,7 +786,7 @@ void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const void* weightsxml = gTools().xmlengine().NewChild(nn, 0, "Synapses"); gTools().xmlengine().NewAttr (weightsxml, 0, "InputSize", gTools().StringFromInt((int)fNet.inputSize ())); gTools().xmlengine().NewAttr (weightsxml, 0, "OutputSize", gTools().StringFromInt((int)fNet.outputSize ())); - gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt(((int)fWeightBucket.size ()) * TMVA::DNN::BUCKET_SIZE); + gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt(((int)fWeightBucket.size ()) * (TMVA::DNN::BUCKET_SIZE))); std::stringstream s(""); s.precision( 16 ); for (std::vector::const_iterator it = fWeightBucket.begin (), itEnd = fWeightBucket.end (); it != itEnd; ++it) @@ -856,7 +856,7 @@ void TMVA::MethodDNN::ReadWeightsFromXML( void* wghtnode ) const char* content = gTools().GetContent (xmlWeights); std::stringstream sstr (content); - for (Int_t iWeight = 0; iWeight< (numWeights / TMVA::DNN::BUCKET_SIZE; ++iWeight) + for (Int_t iWeight = 0; iWeight< (numWeights / (TMVA::DNN::BUCKET_SIZE)); ++iWeight) { // synapses Double_t weight; sstr >> weight; @@ -1100,7 +1100,7 @@ void TMVA::MethodDNN::checkGradients () // net.setErrorFunction (ModeErrorFunction::SUMOFSQUARES); size_t numWeights = fNet.numWeights (inputSize); - std::vector weights (numWeights); + std::vector weightBucket (numWeights / TMVA::DNN::BUCKET_SIZE); //weights.at (0) = 1000213.2; std::vector pattern; @@ -1128,26 +1128,26 @@ void TMVA::MethodDNN::checkGradients () size_t largeDifferences = 0; for (size_t iTest = 0; iTest < 1000; ++iTest) { - TMVA::DNN::uniformDouble (weights, 0.7); - std::vector gradients (numWeights, 0); + TMVA::DNN::uniformDouble (weightBucket, 0.7); + std::vector gradientBucket (numWeights / TMVA::DNN::BUCKET_SIZE, 0); DNN::Batch batch (begin (pattern), end (pattern)); DNN::DropContainer dropContainer; std::tuple settingsAndBatch (settings, batch, dropContainer); - double E = fNet (settingsAndBatch, weights, gradients); - std::vector changedWeights; - changedWeights.assign (weights.begin (), weights.end ()); + double E = fNet (settingsAndBatch, weightBucket, gradientBucket); + std::vector changedWeightBucket; + changedWeightBucket.assign (weightBucket.begin (), weightBucket.end ()); - int changeWeightPosition = TMVA::DNN::randomInt (numWeights); - double dEdw = gradients.at (changeWeightPosition); + int changeWeightPosition = TMVA::DNN::randomInt (numWeights / TMVA::DNN::BUCKET_SIZE); + double dEdw = gradientBucket.at (changeWeightPosition); while (dEdw == 0.0) { - changeWeightPosition = TMVA::DNN::randomInt (numWeights); - dEdw = gradients.at (changeWeightPosition); + changeWeightPosition = TMVA::DNN::randomInt (numWeights / TMVA::DNN::BUCKET_SIZE); + dEdw = gradientBucket.at (changeWeightPosition); } const double gamma = 0.01; double delta = gamma*dEdw; - changedWeights.at (changeWeightPosition) += delta; + changedWeightBucket.at (changeWeightPosition) += delta; if (dEdw == 0.0) { std::cout << "dEdw == 0.0 "; @@ -1155,7 +1155,7 @@ void TMVA::MethodDNN::checkGradients () } assert (dEdw != 0.0); - double Echanged = fNet (settingsAndBatch, changedWeights); + double Echanged = fNet (settingsAndBatch, changedWeightBucket); // double difference = fabs((E-Echanged) - delta*dEdw); double difference = fabs ((E+delta - Echanged)/E); @@ -1184,7 +1184,7 @@ void TMVA::MethodDNN::checkGradients () } else { - // for_each (begin (weights), end (weights), [](double w){ std::cout << w << ", "; }); + // for_each (begin (weightBucket), end (weightBucket), [](double w){ std::cout << w << ", "; }); // std::cout << std::endl; // assert (isOk); } From 96b29df19cd99698bbe8809befe564687db3020d Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Thu, 25 Aug 2016 21:01:15 +0530 Subject: [PATCH 28/42] Successful build NeuralNet.h --- tmva/tmva/inc/TMVA/NeuralNet.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index d022522b5ec70..5122696f96938 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -52,6 +52,7 @@ #include // turn on or off exceptions for NaN and other numeric exceptions +#include namespace TMVA { @@ -62,6 +63,17 @@ namespace TMVA // double gaussDoubl (edouble mean, double sigma); const int BUCKET_SIZE = 8; // ------------------------------- Declare Bucket Size -------------------------------------------- + /*! \brief Hash initialization + * + * + */ + // std::hash hasherFunction; + + + int hasherFunction(int a); + + // --------------------------------------------------------------------------------- + double gaussDouble (double mean, double sigma); double uniformDouble (double minValue, double maxValue); @@ -407,7 +419,7 @@ namespace TMVA - + template double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization); @@ -722,7 +734,7 @@ namespace TMVA template - void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket); + void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket); @@ -1159,8 +1171,8 @@ namespace TMVA size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net - - std::vector compute (const std::vector& input, std::vector& weightBucket) const; ///< compute the net with the given input and the given weights + template + std::vector compute (const std::vector& input, Weights& weightBucket) const; ///< compute the net with the given input and the given weights template double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients @@ -1202,10 +1214,10 @@ namespace TMVA void fetchOutput (const std::vector& layerPatternData, OutputContainer& outputContainer) const; - + template std::tuple computeError (const Settings& settings, std::vector& lastLayerData, - Batch& batch, std::vector& weightBucket) const; + Batch& batch, Weights& weightBucket) const; template void backPropagate (std::vector>& layerPatternData, std::vector& weightBucket, std::vector& gradientBucket, From f301784f4e93adb04eef7d169e183a3c2397138c Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Thu, 25 Aug 2016 21:02:01 +0530 Subject: [PATCH 29/42] Successful build NeuralNet.icc --- tmva/tmva/inc/TMVA/NeuralNet.icc | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index 265b923339527..5527f4f225676 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -8,21 +8,13 @@ #include "Math/Util.h" -#include + namespace TMVA { namespace DNN { - /*! \brief Hash initialization - * - * - */ - std::hash hasherFunction; - - // --------------------------------------------------------------------------------- - @@ -232,7 +224,7 @@ template ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, int itGradient, std::vector& gradientBucket, - int itWeight, std::vector& weightBucket, double& weightDecay) + int itWeight, std::vector& weightBucket, double& factorWeightDecay) { // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights while (itSource != itSourceEnd) @@ -241,7 +233,7 @@ template auto itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[hasherFunction(itWeight) % BUCKET_SIZE],weightDecay); + (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[hasherFunction(itWeight) % BUCKET_SIZE],factorWeightDecay); ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight; } ++itSource; @@ -485,7 +477,7 @@ template * * */ - + template double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization) { if (eRegularization == EnumRegularization::L1) @@ -1017,8 +1009,8 @@ template * \param input the input data * \param weights the weight data */ - - std::vector Net::compute (const std::vector& input, std::vector& weightBucket) const + template + std::vector Net::compute (const std::vector& input, Weights& weightBucket) const { std::vector layerData; layerData.reserve (m_layers.size ()+1); @@ -1309,10 +1301,10 @@ template - + template std::tuple Net::computeError (const Settings& settings, std::vector& lastLayerData, - Batch& batch, std::vector& weightBucket) const + Batch& batch, Weights& weightBucket) const { typename std::vector::iterator itLayerData = lastLayerData.begin (); From 9f1dce56790d7a5a32a3cf58e97ced067da45768 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Thu, 25 Aug 2016 21:02:58 +0530 Subject: [PATCH 30/42] Successful build NeuralNet.cxx --- tmva/tmva/src/NeuralNet.cxx | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tmva/tmva/src/NeuralNet.cxx b/tmva/tmva/src/NeuralNet.cxx index cc1f221959aab..a66beab126ec8 100644 --- a/tmva/tmva/src/NeuralNet.cxx +++ b/tmva/tmva/src/NeuralNet.cxx @@ -8,6 +8,17 @@ namespace TMVA namespace DNN { + int hasherFunction(int a) + { + a = (a+0x7ed55d16) + (a<<12); + a = (a^0xc761c23c) ^ (a>>19); + a = (a+0x165667b1) + (a<<5); + a = (a+0xd3a2646c) ^ (a<<9); + a = (a+0xfd7046c5) + (a<<3); + a = (a^0xb55a4f09) ^ (a>>16); + return a; + } + double gaussDouble (double mean, double sigma) From f5e69437a1c238bfc72eaf74e1e05fc27a494e85 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 26 Aug 2016 18:17:27 +0530 Subject: [PATCH 31/42] Update NeuralNet.icc --- tmva/tmva/inc/TMVA/NeuralNet.icc | 104 +++++++++++++++++++------------ 1 file changed, 63 insertions(+), 41 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index 5527f4f225676..4f219b9a65981 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -8,6 +8,7 @@ #include "Math/Util.h" +#include namespace TMVA @@ -253,9 +254,8 @@ template * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle */ template - double Steepest::operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough) + double Steepest::operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough, const size_t& numWeights, std::vector& layerWeightNumber) { - size_t numWeights = weightBucket.size () * BUCKET_SIZE; // std::vector gradients (numWeights, 0.0); std::vector gradientBucket (weightBucket.size (), 0.0); std::vector localWeightBucket (begin (weightBucket), end (weightBucket)); @@ -279,12 +279,18 @@ template // --- nesterov momentum --- // apply momentum before computing the new gradient int itPrevG = 0; - int itPrevGEnd = numWeights - 1; + int itPrevGEnd = numWeights; int itLocWeight = 0; - for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight) + int itLWN, layerNumber = 0; + + for (auto itLayerWeightNumber = layerWeightNumber.begin(); itLayerWeightNumber != layerWeightNumber.end(); ++itLayerWeightNumber, ++layerNumber) { - (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]) *= m_beta; - (localWeightBucket[hasherFunction(itLocWeight) % BUCKET_SIZE]) += (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]); + for(itLWN = 0; itLWN < *itLayerWeightNumber; ++itLWN) + { + (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) *= m_beta; + (localWeightBucket[(hasherFunction(itLocWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) += (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); + ++itPrevG; ++itLocWeight; + } } E = fitnessFunction (passThrough, localWeightBucket, gradientBucket); // ************************** @@ -295,22 +301,28 @@ template // double alpha = m_alpha; int itG = 0; - int itGEnd = numWeights - 1; + int itGEnd = numWeights; itPrevG = 0; double maxGrad = 0.0; - for (; itG != itGEnd; ++itG, ++itPrevG) + layerNumber = 0; + + for (auto itLayerWeightNumber = layerWeightNumber.begin(); itLayerWeightNumber != layerWeightNumber.end(); ++itLayerWeightNumber, ++layerNumber) { - double currGrad = (gradientBucket[hasherFunction(itG) % BUCKET_SIZE]); - double prevGrad = (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]); - currGrad *= alpha; - - //(*itPrevG) = m_beta * (prevGrad + currGrad); - currGrad += prevGrad; - (gradientBucket[hasherFunction(itG) % BUCKET_SIZE]) = currGrad; - (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]) = currGrad; + for(itLWN = 0; itLWN < *itLayerWeightNumber; ++itLWN) + { + double currGrad = (gradientBucket[(hasherFunction(itG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); + double prevGrad = (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); + currGrad *= alpha; - if (std::fabs (currGrad) > maxGrad) - maxGrad = currGrad; + //(*itPrevG) = m_beta * (prevGrad + currGrad); + currGrad += prevGrad; + (gradientBucket[(hasherFunction(itG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) = currGrad; + (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) = currGrad; + + if (std::fabs (currGrad) > maxGrad) + maxGrad = currGrad; + ++itG; ++itPrevG; + } } if (maxGrad > 1) @@ -648,7 +660,7 @@ template return; int itWeight = 0; - int itWeightEnd = weightBucket.size() * BUCKET_SIZE; + int itWeightEnd = (int) numWeights (); auto itDrop = std::begin (drops); auto itDropEnd = std::end (drops); size_t numNodesPrev = inputSize (); @@ -700,7 +712,7 @@ template * \param settings the settings for the training (e.g. multithreading or not, regularization etc.) */ template - double Net::train (std::vector& weightBucket, + double Net::train (std::vector& weightBucket, std::vector& layerWeightNumber, std::vector& trainPattern, const std::vector& testPattern, Minimizer& minimizer, @@ -755,7 +767,7 @@ template } // execute training cycle - trainError = trainCycle (minimizer, weightBucket, begin (trainPattern), end (trainPattern), settings, dropContainer); + trainError = trainCycle (minimizer, weightBucket, layerWeightNumber, begin (trainPattern), end (trainPattern), settings, dropContainer); // ------ check if we have to execute a test ------------------ @@ -913,7 +925,7 @@ template * \param dropContainer the data for dropping-out nodes (regularization technique) */ template - inline double Net::trainCycle (Minimizer& minimizer, std::vector& weightBucket, + double Net::trainCycle (Minimizer& minimizer, std::vector& weightBucket, std::vector& layerWeightNumber, Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer) { double error = 0.0; @@ -974,7 +986,7 @@ template { Batch& batch = *it; pass_through_type settingsAndBatch (settings, batch, dropContainer); - localError += minimizer ((*this), weightBucket, settingsAndBatch); /// call the minimizer + localError += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber); /// call the minimizer } return localError; }) @@ -988,8 +1000,8 @@ template { for (auto& batch : batches) { - std::tuple settingsAndBatch (settings, batch, dropContainer); - error += minimizer ((*this), weightBucket, settingsAndBatch); + pass_through_type settingsAndBatch (settings, batch, dropContainer); + error += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber); } } @@ -1046,8 +1058,8 @@ template double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward - assert (numWeights () == (weightBucket.size() * BUCKET_SIZE)); - double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, nothing, false, weightBucket, nothing); + // assert (numWeights () == (weightBucket.size() * BUCKET_SIZE)); + double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, 0, 10000, nothing, false, weightBucket, nothing); return error; } @@ -1055,8 +1067,8 @@ template double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward - assert (numWeights () == (weightBucket.size () * BUCKET_SIZE)); - double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, outputContainer, true, weightBucket, nothing); + // assert (numWeights () == (weightBucket.size () * BUCKET_SIZE)); + double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, 0, 10000, outputContainer, true, weightBucket, nothing); return error; } @@ -1065,9 +1077,11 @@ template double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket) const { std::vector nothing; - assert (numWeights () == (weightBucket.size () * BUCKET_SIZE)); - assert ((weightBucket.size () * BUCKET_SIZE) == (gradientBucket.size () * BUCKET_SIZE)); - double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, (gradientBucket.size () * BUCKET_SIZE) - 1, 0, nothing, false, weightBucket, gradientBucket); + // std::cout<<"\nnumWeights = "<minimizerType () == TMVA::DNN::MinimizerType::fSteepest) { DNN::Steepest minimizer (ptrSettings->learningRate (), ptrSettings->momentum (), ptrSettings->repetitions ()); - /*E =*/fNet.train (fWeightBucket, trainPattern, testPattern, minimizer, *ptrSettings.get ()); + /*E =*/fNet.train (fWeightBucket, layerWeightNumber, trainPattern, testPattern, minimizer, *ptrSettings.get ()); } ptrSettings.reset (); Log () << kINFO << Endl; @@ -786,7 +788,7 @@ void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const void* weightsxml = gTools().xmlengine().NewChild(nn, 0, "Synapses"); gTools().xmlengine().NewAttr (weightsxml, 0, "InputSize", gTools().StringFromInt((int)fNet.inputSize ())); gTools().xmlengine().NewAttr (weightsxml, 0, "OutputSize", gTools().StringFromInt((int)fNet.outputSize ())); - gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt(((int)fWeightBucket.size ()) * (TMVA::DNN::BUCKET_SIZE))); + gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt((int)fNet.numWeights ())); std::stringstream s(""); s.precision( 16 ); for (std::vector::const_iterator it = fWeightBucket.begin (), itEnd = fWeightBucket.end (); it != itEnd; ++it) @@ -856,7 +858,7 @@ void TMVA::MethodDNN::ReadWeightsFromXML( void* wghtnode ) const char* content = gTools().GetContent (xmlWeights); std::stringstream sstr (content); - for (Int_t iWeight = 0; iWeight< (numWeights / (TMVA::DNN::BUCKET_SIZE)); ++iWeight) + for (Int_t iWeight = 0; iWeight< numWeights; ++iWeight) // ************* { // synapses Double_t weight; sstr >> weight; From f62fcf21cb5ff451d60b08e2998b7fed74ffee28 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Fri, 26 Aug 2016 18:18:39 +0530 Subject: [PATCH 34/42] Update NeuralNet.cxx --- tmva/tmva/src/NeuralNet.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tmva/tmva/src/NeuralNet.cxx b/tmva/tmva/src/NeuralNet.cxx index a66beab126ec8..0fccdb5a87a44 100644 --- a/tmva/tmva/src/NeuralNet.cxx +++ b/tmva/tmva/src/NeuralNet.cxx @@ -1,7 +1,7 @@ #include "TMVA/NeuralNet.h" - +#include namespace TMVA { @@ -16,7 +16,7 @@ namespace TMVA a = (a+0xd3a2646c) ^ (a<<9); a = (a+0xfd7046c5) + (a<<3); a = (a^0xb55a4f09) ^ (a>>16); - return a; + return std::abs(a); } From e48d24f443799e86be22da2bab29385fbc1954da Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Sun, 28 Aug 2016 03:00:29 +0530 Subject: [PATCH 35/42] Made some logical changes in HashedNets --- tmva/tmva/inc/TMVA/NeuralNet.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 210b5a8fc6cd7..b0336f8416598 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -268,12 +268,12 @@ namespace TMVA template - void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int itWeight, std::vector& weightBucket, ItTarget itTargetBegin, ItTarget itTargetEnd); + void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, ItTarget itTargetBegin, ItTarget itTargetEnd); - template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop); + template + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop); @@ -292,7 +292,7 @@ namespace TMVA void update (ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, - int itGradient, std::vector& gradientBucket); + int itGradient, std::vector& gradientBucket, size_t layerNumber); @@ -301,7 +301,7 @@ namespace TMVA ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, int itGradient, std::vector& gradientBucket, - int itWeight, std::vector& weightBucket, double& weightDecay); + int itWeight, std::vector& weightBucket, double& factorWeightDecay, size_t layerNumber); @@ -420,7 +420,7 @@ namespace TMVA template - double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization); + double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization, size_t layerNumber); @@ -726,15 +726,15 @@ namespace TMVA template - void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket); + void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber); template - void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket); + void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber); template - void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket); + void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket, size_t layerNumber); @@ -1255,7 +1255,7 @@ namespace TMVA double patternWeight, std::vector& weightBucket, double factorWeightDecay, - EnumRegularization eRegularization) const; + EnumRegularization eRegularization, size_t layerNumber) const; const std::vector& layers () const { return m_layers; } ///< returns the layers (structure) From 2bf3295db8f6e6a43dfec7207df9e1b032d681b5 Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Sun, 28 Aug 2016 03:01:18 +0530 Subject: [PATCH 36/42] Made some logical changes in HashedNets --- tmva/tmva/inc/TMVA/NeuralNet.icc | 84 ++++++++++++++++---------------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index 4f219b9a65981..f58cb8cbcea41 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -79,7 +79,7 @@ namespace TMVA */ template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - int itWeight, std::vector& weightBucket, + int itWeight, std::vector& weightBucket, size_t layerNumber, ItTarget itTargetBegin, ItTarget itTargetEnd, ItDrop itDrop) { @@ -88,7 +88,7 @@ template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop) + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop) { for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) { for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr) { if (!HasDropOut || *itDrop) - (*itPrev) += (*itCurr) * (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]); + (*itPrev) += (*itCurr) * (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); ++itWeight; } if (HasDropOut) ++itDrop; @@ -170,7 +170,7 @@ template void update (ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, - int itGradient, std::vector& gradientBucket) + int itGradient, std::vector& gradientBucket, size_t layerNumber) { while (itSource != itSourceEnd) { @@ -178,7 +178,7 @@ template auto itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient); + (gradientBucket[(hasherFunction(itGradient) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient); ++itTargetDelta; ++itTargetGradient; ++itGradient; } ++itSource; @@ -225,7 +225,7 @@ template ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, int itGradient, std::vector& gradientBucket, - int itWeight, std::vector& weightBucket, double& factorWeightDecay) + int itWeight, std::vector& weightBucket, double& factorWeightDecay, size_t layerNumber) { // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights while (itSource != itSourceEnd) @@ -234,7 +234,7 @@ template auto itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - (gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[hasherFunction(itWeight) % BUCKET_SIZE],factorWeightDecay); + (gradientBucket[(hasherFunction(itGradient) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)],factorWeightDecay); ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight; } ++itSource; @@ -490,7 +490,7 @@ template * */ template - double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization) + double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization, size_t layerNumber) { if (eRegularization == EnumRegularization::L1) { @@ -500,7 +500,7 @@ template int itWeight; for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n) { - double weight = (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]); + double weight = (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); w += std::fabs (weight); } return error + 0.5 * w * factorWeightDecay / n; @@ -513,7 +513,7 @@ template int itWeight; for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n) { - double weight = (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]); + double weight = (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); w += weight*weight; } return error + 0.5 * w * factorWeightDecay / n; @@ -540,12 +540,12 @@ template * */ template - void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket) + void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber) { if (prevLayerData.hasDropOut ()) { applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), weightBucket, + currLayerData.weightsBegin (), weightBucket, layerNumber, currLayerData.valuesBegin (), currLayerData.valuesEnd (), prevLayerData.dropOut ()); } @@ -553,7 +553,7 @@ template { bool dummy = true; applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), weightBucket, + currLayerData.weightsBegin (), weightBucket, layerNumber, currLayerData.valuesBegin (), currLayerData.valuesEnd (), &dummy); // dummy to turn on all nodes (no drop out) } @@ -566,12 +566,12 @@ template * */ template - void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket) + void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber) { if (prevLayerData.hasDropOut ()) { applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.weightsBegin (), weightBucket, + currLayerData.weightsBegin (), weightBucket, layerNumber, prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), prevLayerData.dropOut ()); } @@ -579,7 +579,7 @@ template { bool dummy = true; applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.weightsBegin (), weightBucket, + currLayerData.weightsBegin (), weightBucket, layerNumber, prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), &dummy); // dummy to use all nodes (no drop out) } @@ -594,7 +594,7 @@ template * */ template - void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket) + void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket, size_t layerNumber) { // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights if (factorWeightDecay != 0.0) // has weight regularization @@ -605,7 +605,7 @@ template currLayerData.deltasEnd (), currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (), gradientBucket, - currLayerData.weightsBegin (), weightBucket, factorWeightDecay); + currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber); } else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) ) { @@ -614,14 +614,14 @@ template currLayerData.deltasEnd (), currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (), gradientBucket, - currLayerData.weightsBegin (), weightBucket, factorWeightDecay); + currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber); } else { update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), currLayerData.deltasBegin (), currLayerData.deltasEnd (), currLayerData.valueGradientsBegin (), - currLayerData.gradientsBegin (), gradientBucket); + currLayerData.gradientsBegin (), gradientBucket, layerNumber); } else @@ -629,7 +629,7 @@ template update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), currLayerData.deltasBegin (), currLayerData.deltasEnd (), currLayerData.valueGradientsBegin (), - currLayerData.gradientsBegin (), gradientBucket); + currLayerData.gradientsBegin (), gradientBucket, layerNumber); } } @@ -659,20 +659,22 @@ template if (drops.empty () || weightBucket.empty ()) return; - int itWeight = 0; - int itWeightEnd = (int) numWeights (); + int itWeightBucket = 0; + int itWeightBucketEnd = (int) weightBucket.size(); auto itDrop = std::begin (drops); auto itDropEnd = std::end (drops); - size_t numNodesPrev = inputSize (); + // size_t numNodesPrev = inputSize (); double dropFractionPrev = *itDrop; ++itDrop; + // size_t layerNumber = 0; + for (auto& layer : layers ()) { if (itDrop == itDropEnd) break; - size_t _numNodes = layer.numNodes (); + // size_t _numNodes = layer.numNodes (); double dropFraction = *itDrop; double pPrev = 1.0 - dropFractionPrev; @@ -683,18 +685,19 @@ template { p = 1.0/p; } - size_t _numWeights = layer.numWeights (numNodesPrev); - for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight) + // size_t _numWeights = layer.numWeights (numNodesPrev); + for (size_t iWeightBucket = 0; iWeightBucket < BUCKET_SIZE; ++iWeightBucket) { - if (itWeight == itWeightEnd) + if (itWeightBucket == itWeightBucketEnd) break; - weightBucket[hasherFunction(itWeight) % BUCKET_SIZE] *= p; - ++itWeight; + weightBucket[itWeightBucket] *= p; + ++itWeightBucket; } - numNodesPrev = _numNodes; + // numNodesPrev = _numNodes; dropFractionPrev = dropFraction; ++itDrop; + // ++layerNumber; } } @@ -1221,7 +1224,7 @@ template LayerData& prevLayerData = layerData.at (idxLayer); LayerData& currLayerData = layerData.at (idxLayer+1); - forward (prevLayerData, currLayerData, weightBucket); + forward (prevLayerData, currLayerData, weightBucket, idxLayer); applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); } @@ -1261,7 +1264,7 @@ template LayerData& currLayerData = currLayerPatternData.at (idxPattern); - forward (prevLayerData, currLayerData, weightBucket); // feed forward + forward (prevLayerData, currLayerData, weightBucket, idxLayer); // feed forward } // ---------------- loop over layerDatas of pattern apply non-linearities ---------------------------- @@ -1334,9 +1337,8 @@ template double sumError (0.0); size_t idxPattern = 0; - for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData, ++itLayerDataNext) + for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData, ++itLayerDataNext, ++idxPattern) { - ++idxPattern; // compute E and the deltas of the computed output and the true output LayerData& layerData = (*itLayerData); @@ -1344,7 +1346,7 @@ template const Pattern& _pattern = (*itPattern); double error = errorFunction (layerData, nextLayerData, _pattern.output (), _pattern.weight (), weightBucket, settings.factorWeightDecay (), - settings.regularization ()); + settings.regularization (), idxPattern); sumWeights += fabs (_pattern.weight ()); sumError += error; } @@ -1382,7 +1384,7 @@ template LayerData& currLayerData = (*itCurrLayerData); LayerData& prevLayerData = *(itPrevLayerData); - backward (prevLayerData, currLayerData, weightBucket); + backward (prevLayerData, currLayerData, weightBucket, idxLayer); // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses) // because L1 and L2 regularization @@ -1391,7 +1393,7 @@ template // // L1 : -factorWeightDecay*sgn(w)/numWeights // L2 : -factorWeightDecay/numWeights - update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization (), weightBucket, gradientBucket); + update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization (), weightBucket, gradientBucket, idxLayer); } } } @@ -1592,7 +1594,7 @@ template double patternWeight, std::vector& weightBucket, double factorWeightDecay, - EnumRegularization eRegularization) const + EnumRegularization eRegularization, size_t layerNumber) const { double error (0); switch (m_eErrorFunction) @@ -1630,7 +1632,7 @@ template } if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE) { - error = weightDecay (error, layerData.weightsBegin (), nextLayerData.weightsBegin (), weightBucket, factorWeightDecay, eRegularization); + error = weightDecay (error, layerData.weightsBegin (), nextLayerData.weightsBegin (), weightBucket, factorWeightDecay, eRegularization, layerNumber); } return error; } From e048347bc1d9be351c9cada93f25d802fd3fff7a Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Sun, 28 Aug 2016 09:45:10 +0530 Subject: [PATCH 37/42] Update NeuralNet.icc --- tmva/tmva/inc/TMVA/NeuralNet.icc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index f58cb8cbcea41..dfa69312ea06e 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -1384,7 +1384,7 @@ template LayerData& currLayerData = (*itCurrLayerData); LayerData& prevLayerData = *(itPrevLayerData); - backward (prevLayerData, currLayerData, weightBucket, idxLayer); + backward (prevLayerData, currLayerData, weightBucket, idxLayer-1); // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses) // because L1 and L2 regularization @@ -1393,7 +1393,7 @@ template // // L1 : -factorWeightDecay*sgn(w)/numWeights // L2 : -factorWeightDecay/numWeights - update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization (), weightBucket, gradientBucket, idxLayer); + update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization (), weightBucket, gradientBucket, idxLayer-1); } } } From 24c19c282d0c5f17b22bb80e528df7954445babf Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Sun, 28 Aug 2016 21:03:17 +0530 Subject: [PATCH 38/42] Production version v1.0 --- tmva/tmva/inc/TMVA/MethodDNN.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tmva/tmva/inc/TMVA/MethodDNN.h b/tmva/tmva/inc/TMVA/MethodDNN.h index 1655e713bb21f..af7dc3cb2e964 100644 --- a/tmva/tmva/inc/TMVA/MethodDNN.h +++ b/tmva/tmva/inc/TMVA/MethodDNN.h @@ -131,6 +131,7 @@ namespace TMVA { private: TMVA::DNN::Net fNet; std::vector fWeightBucket; + int fBucketSize; TString fLayoutString; std::vector> fLayout; From 3c7f7ac59b9448c9ac61b9ea164931b8444db2cf Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Sun, 28 Aug 2016 21:04:00 +0530 Subject: [PATCH 39/42] Production Version v1.0 --- tmva/tmva/inc/TMVA/NeuralNet.h | 37 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index b0336f8416598..aac47572fabca 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -62,7 +62,7 @@ namespace TMVA // double gaussDoubl (edouble mean, double sigma); - const int BUCKET_SIZE = 8; // ------------------------------- Declare Bucket Size -------------------------------------------- + // const int BUCKET_SIZE = 8; // ------------------------------- Declare Bucket Size -------------------------------------------- /*! \brief Hash initialization * * @@ -268,12 +268,12 @@ namespace TMVA template - void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, ItTarget itTargetBegin, ItTarget itTargetEnd); + void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE, ItTarget itTargetBegin, ItTarget itTargetEnd); template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop); + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop); @@ -292,7 +292,7 @@ namespace TMVA void update (ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, - int itGradient, std::vector& gradientBucket, size_t layerNumber); + int itGradient, std::vector& gradientBucket, size_t layerNumber, int BUCKET_SIZE); @@ -301,7 +301,7 @@ namespace TMVA ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, int itGradient, std::vector& gradientBucket, - int itWeight, std::vector& weightBucket, double& factorWeightDecay, size_t layerNumber); + int itWeight, std::vector& weightBucket, double& factorWeightDecay, size_t layerNumber, int BUCKET_SIZE); @@ -376,7 +376,7 @@ namespace TMVA * called */ template - double operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough, const size_t& numWeights, std::vector& layerWeightNumber); + double operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough, const size_t& numWeights, std::vector& layerWeightNumber, const int& BUCKET_SIZE); double m_alpha; ///< internal parameter (learningRate) @@ -420,7 +420,7 @@ namespace TMVA template - double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization, size_t layerNumber); + double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization, size_t layerNumber, int BUCKET_SIZE); @@ -726,15 +726,15 @@ namespace TMVA template - void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber); + void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE); template - void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber); + void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE); template - void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket, size_t layerNumber); + void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket, size_t layerNumber, int BUCKET_SIZE); @@ -752,7 +752,7 @@ namespace TMVA */ Settings (TString name, size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7, - double _factorWeightDecay = 1e-5, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE, + double _factorWeightDecay = 1e-5, int _bucketSize = 8, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE, MinimizerType _eMinimizerType = MinimizerType::fSteepest, double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3, @@ -788,6 +788,7 @@ namespace TMVA double momentum () const { return fMomentum; } ///< get the momentum (e.g. for SGD) int repetitions () const { return fRepetitions; } ///< how many steps have to be gone until the batch is changed MinimizerType minimizerType () const { return fMinimizerType; } ///< which minimizer shall be used (e.g. SGD) + int bucketSize () const { return fBucketSize; } ///< Number of Weight Buckets per Layer @@ -869,6 +870,7 @@ namespace TMVA double fMomentum; int fRepetitions; MinimizerType fMinimizerType; + int fBucketSize; size_t m_convergenceCount; size_t m_maxConvergenceCount; @@ -916,11 +918,11 @@ namespace TMVA */ ClassificationSettings (TString name, size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7, - double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE, + double _factorWeightDecay = 1e-5, int _bucketSize = 8, EnumRegularization _regularization = EnumRegularization::NONE, size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest, double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3, bool _useMultithreading = true) - : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay, + : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay, _bucketSize, _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading) , m_ams () , m_sumOfSigWeights (0) @@ -1091,6 +1093,7 @@ namespace TMVA : m_eErrorFunction (ModeErrorFunction::SUMOFSQUARES) , m_sizeInput (0) , m_layers () + , m_bucketSize (8) { } @@ -1102,6 +1105,7 @@ namespace TMVA : m_eErrorFunction (other.m_eErrorFunction) , m_sizeInput (other.m_sizeInput) , m_layers (other.m_layers) + , m_bucketSize (other.m_bucketSize) { } @@ -1166,13 +1170,13 @@ namespace TMVA template void forwardPattern (const LayerContainer& _layers, - std::vector& layerData, std::vector& weightBucket) const; + std::vector& layerData, std::vector& weightBucket, int BUCKET_SIZE) const; size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net template - std::vector compute (const std::vector& input, Weights& weightBucket) const; ///< compute the net with the given input and the given weights + std::vector compute (const std::vector& input, Weights& weightBucket, int BUCKET_SIZE) const; ///< compute the net with the given input and the given weights template double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients @@ -1273,7 +1277,7 @@ namespace TMVA template void initializeWeights (WeightInitializationStrategy eInitStrategy, - OutIterator itWeight, std::vector& layerWeightNumber); ///< initialize the weights with the given strategy + OutIterator itWeight, std::vector& layerWeightNumber, int BUCKET_SIZE); ///< initialize the weights with the given strategy protected: @@ -1287,6 +1291,7 @@ namespace TMVA size_t m_sizeInput; ///< input size of this DNN size_t m_sizeOutput; ///< outut size of this DNN std::vector m_layers; ///< layer-structure-data + int m_bucketSize; }; From e6332d5de059bb6431bd8ae31d4b1f6a75b08a8f Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Sun, 28 Aug 2016 21:04:37 +0530 Subject: [PATCH 40/42] Production Version v1.0 --- tmva/tmva/inc/TMVA/NeuralNet.icc | 70 ++++++++++++++++---------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index dfa69312ea06e..ae0e7c884b493 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -79,7 +79,7 @@ namespace TMVA */ template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - int itWeight, std::vector& weightBucket, size_t layerNumber, + int itWeight, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE, ItTarget itTargetBegin, ItTarget itTargetEnd, ItDrop itDrop) { @@ -105,7 +105,7 @@ template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop) + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop) { for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) { @@ -170,7 +170,7 @@ template void update (ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, - int itGradient, std::vector& gradientBucket, size_t layerNumber) + int itGradient, std::vector& gradientBucket, size_t layerNumber, int BUCKET_SIZE) { while (itSource != itSourceEnd) { @@ -225,7 +225,7 @@ template ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, int itGradient, std::vector& gradientBucket, - int itWeight, std::vector& weightBucket, double& factorWeightDecay, size_t layerNumber) + int itWeight, std::vector& weightBucket, double& factorWeightDecay, size_t layerNumber, int BUCKET_SIZE) { // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights while (itSource != itSourceEnd) @@ -254,7 +254,7 @@ template * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle */ template - double Steepest::operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough, const size_t& numWeights, std::vector& layerWeightNumber) + double Steepest::operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough, const size_t& numWeights, std::vector& layerWeightNumber, const int& BUCKET_SIZE) { // std::vector gradients (numWeights, 0.0); std::vector gradientBucket (weightBucket.size (), 0.0); @@ -490,7 +490,7 @@ template * */ template - double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization, size_t layerNumber) + double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization, size_t layerNumber, int BUCKET_SIZE) { if (eRegularization == EnumRegularization::L1) { @@ -540,12 +540,12 @@ template * */ template - void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber) + void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE) { if (prevLayerData.hasDropOut ()) { applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), weightBucket, layerNumber, + currLayerData.weightsBegin (), weightBucket, layerNumber, BUCKET_SIZE, currLayerData.valuesBegin (), currLayerData.valuesEnd (), prevLayerData.dropOut ()); } @@ -553,7 +553,7 @@ template { bool dummy = true; applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), weightBucket, layerNumber, + currLayerData.weightsBegin (), weightBucket, layerNumber, BUCKET_SIZE, currLayerData.valuesBegin (), currLayerData.valuesEnd (), &dummy); // dummy to turn on all nodes (no drop out) } @@ -566,12 +566,12 @@ template * */ template - void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber) + void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE) { if (prevLayerData.hasDropOut ()) { applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.weightsBegin (), weightBucket, layerNumber, + currLayerData.weightsBegin (), weightBucket, layerNumber, BUCKET_SIZE, prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), prevLayerData.dropOut ()); } @@ -579,7 +579,7 @@ template { bool dummy = true; applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.weightsBegin (), weightBucket, layerNumber, + currLayerData.weightsBegin (), weightBucket, layerNumber, BUCKET_SIZE, prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), &dummy); // dummy to use all nodes (no drop out) } @@ -594,7 +594,7 @@ template * */ template - void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket, size_t layerNumber) + void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket, size_t layerNumber, int BUCKET_SIZE) { // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights if (factorWeightDecay != 0.0) // has weight regularization @@ -605,7 +605,7 @@ template currLayerData.deltasEnd (), currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (), gradientBucket, - currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber); + currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber, BUCKET_SIZE); } else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) ) { @@ -614,14 +614,14 @@ template currLayerData.deltasEnd (), currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (), gradientBucket, - currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber); + currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber, BUCKET_SIZE); } else { update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), currLayerData.deltasBegin (), currLayerData.deltasEnd (), currLayerData.valueGradientsBegin (), - currLayerData.gradientsBegin (), gradientBucket, layerNumber); + currLayerData.gradientsBegin (), gradientBucket, layerNumber, BUCKET_SIZE); } else @@ -629,7 +629,7 @@ template update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), currLayerData.deltasBegin (), currLayerData.deltasEnd (), currLayerData.valueGradientsBegin (), - currLayerData.gradientsBegin (), gradientBucket, layerNumber); + currLayerData.gradientsBegin (), gradientBucket, layerNumber, BUCKET_SIZE); } } @@ -686,7 +686,7 @@ template p = 1.0/p; } // size_t _numWeights = layer.numWeights (numNodesPrev); - for (size_t iWeightBucket = 0; iWeightBucket < BUCKET_SIZE; ++iWeightBucket) + for (size_t iWeightBucket = 0; iWeightBucket < m_bucketSize; ++iWeightBucket) { if (itWeightBucket == itWeightBucketEnd) break; @@ -728,6 +728,8 @@ template settings.create ("trainErrors", 100, 0, 100, 100, 0,1); settings.create ("testErrors", 100, 0, 100, 100, 0,1); + m_bucketSize = settings.bucketSize (); + size_t cycleCount = 0; size_t testCycleCount = 0; double testError = 1e20; @@ -935,6 +937,7 @@ template size_t numPattern = std::distance (itPatternBegin, itPatternEnd); size_t numBatches = numPattern/settings.batchSize (); size_t numBatches_stored = numBatches; + const int const_m_bucketSize = m_bucketSize; std::random_shuffle (itPatternBegin, itPatternEnd); Iterator itPatternBatchBegin = itPatternBegin; @@ -989,7 +992,7 @@ template { Batch& batch = *it; pass_through_type settingsAndBatch (settings, batch, dropContainer); - localError += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber); /// call the minimizer + localError += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber, const_m_bucketSize); /// call the minimizer } return localError; }) @@ -1004,7 +1007,7 @@ template for (auto& batch : batches) { pass_through_type settingsAndBatch (settings, batch, dropContainer); - error += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber); + error += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber, const_m_bucketSize); } } @@ -1025,7 +1028,7 @@ template * \param weights the weight data */ template - std::vector Net::compute (const std::vector& input, Weights& weightBucket) const + std::vector Net::compute (const std::vector& input, Weights& weightBucket, int BUCKET_SIZE) const { std::vector layerData; layerData.reserve (m_layers.size ()+1); @@ -1048,7 +1051,7 @@ template // --------- forward ------------- - forwardPattern (m_layers, layerData, weightBucket); + forwardPattern (m_layers, layerData, weightBucket, BUCKET_SIZE); // ------------- fetch output ------------------ std::vector output; @@ -1061,7 +1064,7 @@ template double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward - // assert (numWeights () == (weightBucket.size() * BUCKET_SIZE)); + double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, 0, 10000, nothing, false, weightBucket, nothing); return error; } @@ -1070,7 +1073,7 @@ template double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward - // assert (numWeights () == (weightBucket.size () * BUCKET_SIZE)); + double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, 0, 10000, outputContainer, true, weightBucket, nothing); return error; } @@ -1081,8 +1084,7 @@ template { std::vector nothing; // std::cout<<"\nnumWeights = "< ptrSettings = make_shared ( GetName (), convergenceSteps, batchSize, - testRepetitions, factorWeightDecay, + testRepetitions, factorWeightDecay, bucketSize, eRegularization, fScaleToNumEvents, TMVA::DNN::MinimizerType::fSteepest, learningRate, momentum, repetitions, multithreading); @@ -463,7 +465,7 @@ void TMVA::MethodDNN::ProcessOptions() std::shared_ptr ptrSettings = make_shared ( GetName (), convergenceSteps, batchSize, - testRepetitions, factorWeightDecay, + testRepetitions, factorWeightDecay, bucketSize, eRegularization, TMVA::DNN::MinimizerType::fSteepest, learningRate, momentum, repetitions, multithreading); @@ -474,7 +476,7 @@ void TMVA::MethodDNN::ProcessOptions() std::shared_ptr ptrSettings = make_shared ( GetName (), convergenceSteps, batchSize, - testRepetitions, factorWeightDecay, + testRepetitions, factorWeightDecay, bucketSize, eRegularization, TMVA::DNN::MinimizerType::fSteepest, learningRate, momentum, repetitions, multithreading); @@ -602,7 +604,7 @@ void TMVA::MethodDNN::Train() // initialize weights fNet.initializeWeights (fWeightInitializationStrategy, - std::back_inserter (fWeightBucket), layerWeightNumber); + std::back_inserter (fWeightBucket), layerWeightNumber, fBucketSize); } @@ -659,7 +661,7 @@ Double_t TMVA::MethodDNN::GetMvaValue( Double_t* /*errLower*/, Double_t* /*errUp const std::vector& inputValues = GetEvent ()->GetValues (); std::vector input (inputValues.begin (), inputValues.end ()); input.push_back (1.0); // bias node - std::vector output = fNet.compute (input, fWeightBucket); + std::vector output = fNet.compute (input, fWeightBucket, fBucketSize); if (output.empty ()) return 0.0; @@ -680,7 +682,7 @@ const std::vector &TMVA::MethodDNN::GetRegressionValues() const std::vector& inputValues = ev->GetValues (); std::vector input (inputValues.begin (), inputValues.end ()); input.push_back (1.0); // bias node - std::vector output = fNet.compute (input, fWeightBucket); + std::vector output = fNet.compute (input, fWeightBucket, fBucketSize); if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector(); fRegressionReturnVal->clear(); @@ -724,7 +726,7 @@ const std::vector &TMVA::MethodDNN::GetMulticlassValues() const std::vector& inputValues = GetEvent ()->GetValues (); std::vector input (inputValues.begin (), inputValues.end ()); input.push_back (1.0); // bias node - std::vector output = fNet.compute (input, fWeightBucket); + std::vector output = fNet.compute (input, fWeightBucket, fBucketSize); // check the output of the network @@ -1100,9 +1102,9 @@ void TMVA::MethodDNN::checkGradients () fNet.addLayer (DNN::Layer (outputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::SIGMOID)); fNet.setErrorFunction (DNN::ModeErrorFunction::CROSSENTROPY); // net.setErrorFunction (ModeErrorFunction::SUMOFSQUARES); - + const int BUCKET_SIZE = 8; size_t numWeights = fNet.numWeights (inputSize); - std::vector weightBucket (numWeights / TMVA::DNN::BUCKET_SIZE); + std::vector weightBucket (numWeights / BUCKET_SIZE); //weights.at (0) = 1000213.2; std::vector pattern; @@ -1122,7 +1124,7 @@ void TMVA::MethodDNN::checkGradients () } - DNN::Settings settings (TString ("checkGradients"), /*_convergenceSteps*/ 15, /*_batchSize*/ 1, /*_testRepetitions*/ 7, /*_factorWeightDecay*/ 0, /*regularization*/ TMVA::DNN::EnumRegularization::NONE); + DNN::Settings settings (TString ("checkGradients"), /*_convergenceSteps*/ 15, /*_batchSize*/ 1, /*_testRepetitions*/ 7, /*_factorWeightDecay*/ 0, /*_bucketSize*/ 8, /*regularization*/ TMVA::DNN::EnumRegularization::NONE); size_t improvements = 0; size_t worsenings = 0; @@ -1131,7 +1133,7 @@ void TMVA::MethodDNN::checkGradients () for (size_t iTest = 0; iTest < 1000; ++iTest) { TMVA::DNN::uniformDouble (weightBucket, 0.7); - std::vector gradientBucket (numWeights / TMVA::DNN::BUCKET_SIZE, 0); + std::vector gradientBucket (numWeights / BUCKET_SIZE, 0); DNN::Batch batch (begin (pattern), end (pattern)); DNN::DropContainer dropContainer; std::tuple settingsAndBatch (settings, batch, dropContainer); @@ -1139,11 +1141,11 @@ void TMVA::MethodDNN::checkGradients () std::vector changedWeightBucket; changedWeightBucket.assign (weightBucket.begin (), weightBucket.end ()); - int changeWeightPosition = TMVA::DNN::randomInt (numWeights / TMVA::DNN::BUCKET_SIZE); + int changeWeightPosition = TMVA::DNN::randomInt (numWeights / BUCKET_SIZE); double dEdw = gradientBucket.at (changeWeightPosition); while (dEdw == 0.0) { - changeWeightPosition = TMVA::DNN::randomInt (numWeights / TMVA::DNN::BUCKET_SIZE); + changeWeightPosition = TMVA::DNN::randomInt (numWeights / BUCKET_SIZE); dEdw = gradientBucket.at (changeWeightPosition); } From 6eed664e5a7a828d995ec8e115fdd5562839f3db Mon Sep 17 00:00:00 2001 From: Aditya Sharma Date: Sun, 28 Aug 2016 21:06:02 +0530 Subject: [PATCH 42/42] Production Version v1.0 --- tmva/tmva/src/NeuralNet.cxx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tmva/tmva/src/NeuralNet.cxx b/tmva/tmva/src/NeuralNet.cxx index 0fccdb5a87a44..b8670cf1e1ccf 100644 --- a/tmva/tmva/src/NeuralNet.cxx +++ b/tmva/tmva/src/NeuralNet.cxx @@ -220,7 +220,7 @@ namespace TMVA Settings::Settings (TString name, size_t _convergenceSteps, size_t _batchSize, size_t _testRepetitions, - double _factorWeightDecay, EnumRegularization eRegularization, + double _factorWeightDecay, int _bucketSize, EnumRegularization eRegularization, MinimizerType _eMinimizerType, double _learningRate, double _momentum, int _repetitions, bool _useMultithreading) : m_timer (100, name) @@ -244,6 +244,7 @@ namespace TMVA , m_minError (1e10) , m_useMultithreading (_useMultithreading) , fMonitoring (NULL) + , fBucketSize (_bucketSize) { }