Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tmva/tmva/inc/TMVA/NeuralNet.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <thread>
#include <future>
#include <type_traits>
#include <mutex>

#include "Pattern.h"
#include "Monitoring.h"
Expand Down Expand Up @@ -372,6 +373,9 @@ namespace TMVA
double m_alpha; ///< internal parameter (learningRate)
double m_beta; ///< internal parameter (momentum)
std::vector<double> m_prevGradients; ///< vector remembers the gradients of the previous step

std::vector<double> m_localWeights; ///< local weights for reuse in thread.
std::vector<double> m_localGradients; ///< local gradients for reuse in thread.
};


Expand Down
38 changes: 21 additions & 17 deletions tmva/tmva/inc/TMVA/NeuralNet.icc
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,11 @@ template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev
double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)
{
size_t numWeights = weights.size ();
std::vector<double> gradients (numWeights, 0.0);
std::vector<double> localWeights (begin (weights), end (weights));
// std::vector<double> gradients (numWeights, 0.0);
m_localGradients.assign (numWeights, 0.0);
// std::vector<double> localWeights (begin (weights), end (weights));
// m_localWeights.reserve (numWeights);
m_localWeights.assign (begin (weights), end (weights));

double E = 1e10;
if (m_prevGradients.size () != numWeights)
Expand All @@ -281,28 +284,28 @@ template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev
if (currentRepetition >= m_repetitions)
break;

gradients.assign (numWeights, 0.0);
m_localGradients.assign (numWeights, 0.0);

// --- nesterov momentum ---
// apply momentum before computing the new gradient
auto itPrevG = begin (m_prevGradients);
auto itPrevGEnd = end (m_prevGradients);
auto itLocWeight = begin (localWeights);
auto itLocWeight = begin (m_localWeights);
for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
{
(*itPrevG) *= m_beta;
(*itLocWeight) += (*itPrevG);
}

E = fitnessFunction (passThrough, localWeights, gradients);
E = fitnessFunction (passThrough, m_localWeights, m_localGradients);
// plotGradients (gradients);
// plotWeights (localWeights);

double alpha = gaussDouble (m_alpha, m_alpha/2.0);
// double alpha = m_alpha;
// double alpha = m_alpha;

auto itG = begin (gradients);
auto itGEnd = end (gradients);
auto itG = begin (m_localGradients);
auto itGEnd = end (m_localGradients);
itPrevG = begin (m_prevGradients);
double maxGrad = 0.0;
for (; itG != itGEnd; ++itG, ++itPrevG)
Expand Down Expand Up @@ -333,7 +336,7 @@ template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev
else
{
auto itW = std::begin (weights);
std::for_each (std::begin (gradients), std::end (gradients), [&itW](double& g)
std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](double& g)
{
*itW += g;
++itW;
Expand Down Expand Up @@ -781,7 +784,7 @@ template <typename LAYERDATA>
size_t patternPerThread = testPattern.size () / numThreads;
std::vector<Batch> batches;
auto itPat = testPattern.begin ();
// auto itPatEnd = testPattern.end ();
// auto itPatEnd = testPattern.end ();
for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
{
batches.push_back (Batch (itPat, itPat + patternPerThread));
Expand All @@ -798,24 +801,24 @@ template <typename LAYERDATA>
std::async (std::launch::async, [&]()
{
std::vector<double> localOutput;
pass_through_type passThrough (settings, batch, dropContainerTest);
pass_through_type passThrough (settings, batch, dropContainerTest);
double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);
return std::make_tuple (testBatchError, localOutput);
})
);
}

auto itBatch = batches.begin ();
auto itBatch = batches.begin ();
for (auto& f : futures)
{
std::tuple<double,std::vector<double>> result = f.get ();
testError += std::get<0>(result) / batches.size ();
std::vector<double> output = std::get<1>(result);

//if (output.size () == testPattern.size ())
//if (output.size () == testPattern.size ())
{
//auto it = begin (testPattern);
auto it = (*itBatch).begin ();
//auto it = begin (testPattern);
auto it = (*itBatch).begin ();
for (double out : output)
{
settings.testSample (0, out, (*it).output ().at (0), (*it).weight ());
Expand Down Expand Up @@ -982,8 +985,9 @@ template <typename LAYERDATA>
for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
{
Batch& batch = *it;
pass_through_type settingsAndBatch (settings, batch, dropContainer);
localError += minimizer ((*this), weights, settingsAndBatch); /// call the minimizer
pass_through_type settingsAndBatch (settings, batch, dropContainer);
Minimizer minimizerClone (minimizer);
localError += minimizerClone ((*this), weights, settingsAndBatch); /// call the minimizer
}
return localError;
})
Expand Down