Skip to content
Closed
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
818afb9
included xxhash.h
adi-sharma Jun 23, 2016
db6990f
added hashing initializations
adi-sharma Jun 24, 2016
efa2efb
added weightBucket
adi-sharma Jun 24, 2016
c821a57
edited applyWeights() without drop-out as an example
adi-sharma Jun 28, 2016
b6328a7
Updated all applyWeights() with HashedNets
adi-sharma Jul 1, 2016
a58ac1d
Update NeuralNet.h
adi-sharma Jul 1, 2016
c81f248
updated update() and Steepest Gradient Descent
adi-sharma Jul 1, 2016
96c8f42
Updated SGD operator() with HashedNets
adi-sharma Jul 1, 2016
0250775
updated weightDecay()
adi-sharma Jul 1, 2016
7c62f3b
updated weightDecay() with HashedNets
adi-sharma Jul 1, 2016
fe93bb2
Edited dropOutWeightFactor()
adi-sharma Jul 13, 2016
13d151a
Edited dropOutWeightFactor()
adi-sharma Jul 13, 2016
725bba3
Updated with HashedNets
adi-sharma Jul 18, 2016
4ec8917
Updated train() with HashedNets
adi-sharma Jul 18, 2016
b1caf73
Updated with complete HashedNets
adi-sharma Aug 23, 2016
c696958
Updated with complete HashedNets
adi-sharma Aug 23, 2016
8d6b2d4
Updated with Complete HashedNets
adi-sharma Aug 23, 2016
e277c73
Update MethodDNN.h
adi-sharma Aug 23, 2016
a820db6
Update NeuralNet.h
adi-sharma Aug 23, 2016
919a319
Updated with HashedNets
adi-sharma Aug 23, 2016
4d9e46c
Error corrections
adi-sharma Aug 24, 2016
3d7eec3
Error corrections NeuralNet.icc
adi-sharma Aug 24, 2016
d202ae1
Error corrections MethodDNN.cxx
adi-sharma Aug 24, 2016
52dc851
Successful compile
adi-sharma Aug 25, 2016
70484aa
Successful compile
adi-sharma Aug 25, 2016
6a0932f
Successful compile
adi-sharma Aug 25, 2016
4f5b919
Successful compile
adi-sharma Aug 25, 2016
96b29df
Successful build NeuralNet.h
adi-sharma Aug 25, 2016
f301784
Successful build NeuralNet.icc
adi-sharma Aug 25, 2016
9f1dce5
Successful build NeuralNet.cxx
adi-sharma Aug 25, 2016
f5e6943
Update NeuralNet.icc
adi-sharma Aug 26, 2016
c4f8749
Update NeuralNet.h
adi-sharma Aug 26, 2016
9637a69
Update MethodDNN.cxx
adi-sharma Aug 26, 2016
f62fcf2
Update NeuralNet.cxx
adi-sharma Aug 26, 2016
e48d24f
Made some logical changes in HashedNets
adi-sharma Aug 27, 2016
2bf3295
Made some logical changes in HashedNets
adi-sharma Aug 27, 2016
e048347
Update NeuralNet.icc
adi-sharma Aug 28, 2016
24c19c2
Production version v1.0
adi-sharma Aug 28, 2016
3c7f7ac
Production Version v1.0
adi-sharma Aug 28, 2016
e6332d5
Production Version v1.0
adi-sharma Aug 28, 2016
5c33612
Production Version v1.0
adi-sharma Aug 28, 2016
6eed664
Production Version v1.0
adi-sharma Aug 28, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update NeuralNet.icc
  • Loading branch information
adi-sharma authored Aug 26, 2016
commit f5e69437a1c238bfc72eaf74e1e05fc27a494e85
104 changes: 63 additions & 41 deletions tmva/tmva/inc/TMVA/NeuralNet.icc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "Math/Util.h"

#include <iostream>


namespace TMVA
Expand Down Expand Up @@ -253,9 +254,8 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
* Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle
*/
template <typename Function, typename PassThrough>
double Steepest::operator() (Function& fitnessFunction, std::vector<double>& weightBucket, PassThrough& passThrough)
double Steepest::operator() (Function& fitnessFunction, std::vector<double>& weightBucket, PassThrough& passThrough, const size_t& numWeights, std::vector<int>& layerWeightNumber)
{
size_t numWeights = weightBucket.size () * BUCKET_SIZE;
// std::vector<double> gradients (numWeights, 0.0);
std::vector<double> gradientBucket (weightBucket.size (), 0.0);
std::vector<double> localWeightBucket (begin (weightBucket), end (weightBucket));
Expand All @@ -279,12 +279,18 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
// --- nesterov momentum ---
// apply momentum before computing the new gradient
int itPrevG = 0;
int itPrevGEnd = numWeights - 1;
int itPrevGEnd = numWeights;
int itLocWeight = 0;
for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
int itLWN, layerNumber = 0;

for (auto itLayerWeightNumber = layerWeightNumber.begin(); itLayerWeightNumber != layerWeightNumber.end(); ++itLayerWeightNumber, ++layerNumber)
{
(m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]) *= m_beta;
(localWeightBucket[hasherFunction(itLocWeight) % BUCKET_SIZE]) += (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]);
for(itLWN = 0; itLWN < *itLayerWeightNumber; ++itLWN)
{
(m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) *= m_beta;
(localWeightBucket[(hasherFunction(itLocWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) += (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]);
++itPrevG; ++itLocWeight;
}
}

E = fitnessFunction (passThrough, localWeightBucket, gradientBucket); // **************************
Expand All @@ -295,22 +301,28 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
// double alpha = m_alpha;

int itG = 0;
int itGEnd = numWeights - 1;
int itGEnd = numWeights;
itPrevG = 0;
double maxGrad = 0.0;
for (; itG != itGEnd; ++itG, ++itPrevG)
layerNumber = 0;

for (auto itLayerWeightNumber = layerWeightNumber.begin(); itLayerWeightNumber != layerWeightNumber.end(); ++itLayerWeightNumber, ++layerNumber)
{
double currGrad = (gradientBucket[hasherFunction(itG) % BUCKET_SIZE]);
double prevGrad = (m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]);
currGrad *= alpha;

//(*itPrevG) = m_beta * (prevGrad + currGrad);
currGrad += prevGrad;
(gradientBucket[hasherFunction(itG) % BUCKET_SIZE]) = currGrad;
(m_prevGradientBucket[hasherFunction(itPrevG) % BUCKET_SIZE]) = currGrad;
for(itLWN = 0; itLWN < *itLayerWeightNumber; ++itLWN)
{
double currGrad = (gradientBucket[(hasherFunction(itG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]);
double prevGrad = (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]);
currGrad *= alpha;

if (std::fabs (currGrad) > maxGrad)
maxGrad = currGrad;
//(*itPrevG) = m_beta * (prevGrad + currGrad);
currGrad += prevGrad;
(gradientBucket[(hasherFunction(itG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) = currGrad;
(m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) = currGrad;

if (std::fabs (currGrad) > maxGrad)
maxGrad = currGrad;
++itG; ++itPrevG;
}
}

if (maxGrad > 1)
Expand Down Expand Up @@ -648,7 +660,7 @@ template <typename LAYERDATA>
return;

int itWeight = 0;
int itWeightEnd = weightBucket.size() * BUCKET_SIZE;
int itWeightEnd = (int) numWeights ();
auto itDrop = std::begin (drops);
auto itDropEnd = std::end (drops);
size_t numNodesPrev = inputSize ();
Expand Down Expand Up @@ -700,7 +712,7 @@ template <typename LAYERDATA>
* \param settings the settings for the training (e.g. multithreading or not, regularization etc.)
*/
template <typename Minimizer>
double Net::train (std::vector<double>& weightBucket,
double Net::train (std::vector<double>& weightBucket, std::vector<int>& layerWeightNumber,
std::vector<Pattern>& trainPattern,
const std::vector<Pattern>& testPattern,
Minimizer& minimizer,
Expand Down Expand Up @@ -755,7 +767,7 @@ template <typename LAYERDATA>
}

// execute training cycle
trainError = trainCycle (minimizer, weightBucket, begin (trainPattern), end (trainPattern), settings, dropContainer);
trainError = trainCycle (minimizer, weightBucket, layerWeightNumber, begin (trainPattern), end (trainPattern), settings, dropContainer);


// ------ check if we have to execute a test ------------------
Expand Down Expand Up @@ -913,7 +925,7 @@ template <typename LAYERDATA>
* \param dropContainer the data for dropping-out nodes (regularization technique)
*/
template <typename Iterator, typename Minimizer>
inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weightBucket,
double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weightBucket, std::vector<int>& layerWeightNumber,
Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)
{
double error = 0.0;
Expand Down Expand Up @@ -974,7 +986,7 @@ template <typename LAYERDATA>
{
Batch& batch = *it;
pass_through_type settingsAndBatch (settings, batch, dropContainer);
localError += minimizer ((*this), weightBucket, settingsAndBatch); /// call the minimizer
localError += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber); /// call the minimizer
}
return localError;
})
Expand All @@ -988,8 +1000,8 @@ template <typename LAYERDATA>
{
for (auto& batch : batches)
{
std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
error += minimizer ((*this), weightBucket, settingsAndBatch);
pass_through_type settingsAndBatch (settings, batch, dropContainer);
error += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber);
}
}

Expand Down Expand Up @@ -1046,17 +1058,17 @@ template <typename LAYERDATA>
double Net::operator() (PassThrough& settingsAndBatch, std::vector<double>& weightBucket) const
{
std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
assert (numWeights () == (weightBucket.size() * BUCKET_SIZE));
double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, nothing, false, weightBucket, nothing);
// assert (numWeights () == (weightBucket.size() * BUCKET_SIZE));
double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, 0, 10000, nothing, false, weightBucket, nothing);
return error;
}

template <typename PassThrough, typename OutContainer>
double Net::operator() (PassThrough& settingsAndBatch, std::vector<double>& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const
{
std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
assert (numWeights () == (weightBucket.size () * BUCKET_SIZE));
double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, 0, 10000, outputContainer, true, weightBucket, nothing);
// assert (numWeights () == (weightBucket.size () * BUCKET_SIZE));
double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, 0, 10000, outputContainer, true, weightBucket, nothing);
return error;
}

Expand All @@ -1065,19 +1077,21 @@ template <typename LAYERDATA>
double Net::operator() (PassThrough& settingsAndBatch, std::vector<double>& weightBucket, std::vector<double>& gradientBucket) const
{
std::vector<double> nothing;
assert (numWeights () == (weightBucket.size () * BUCKET_SIZE));
assert ((weightBucket.size () * BUCKET_SIZE) == (gradientBucket.size () * BUCKET_SIZE));
double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, (gradientBucket.size () * BUCKET_SIZE) - 1, 0, nothing, false, weightBucket, gradientBucket);
// std::cout<<"\nnumWeights = "<<numWeights ()<<"\n";
// std::cout<<"((weightBucket.size () / BUCKET_SIZE) * numWeights ()) = "<<((weightBucket.size () / BUCKET_SIZE) * numWeights ());
// assert (numWeights () == (weightBucket.size () * BUCKET_SIZE));
assert ((weightBucket.size ()) == (gradientBucket.size ()));
double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, numWeights () - 1, 0, nothing, false, weightBucket, gradientBucket);
return error;
}

template <typename PassThrough, typename OutContainer>
double Net::operator() (PassThrough& settingsAndBatch, std::vector<double>& weightBucket, std::vector<double>& gradientBucket, ModeOutput eFetch, OutContainer& outputContainer) const
{
MATH_UNUSED(eFetch);
assert (numWeights () == weightBucket.size () * BUCKET_SIZE);
assert (weightBucket.size () * BUCKET_SIZE == gradientBucket.size () * BUCKET_SIZE);
double error = forward_backward(m_layers, settingsAndBatch, 0, (weightBucket.size () * BUCKET_SIZE) - 1, 0, (gradientBucket.size () * BUCKET_SIZE) - 1, 0, outputContainer, true, weightBucket, gradientBucket);
// assert (numWeights () == (weightBucket.size () * BUCKET_SIZE));
assert (weightBucket.size () == gradientBucket.size ());
double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, numWeights () - 1, 0, outputContainer, true, weightBucket, gradientBucket);
return error;
}

Expand Down Expand Up @@ -1455,7 +1469,7 @@ template <typename LAYERDATA>
*
*/
template <typename OutIterator>
void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight, std::vector<int>& layerWeightNumber)
{
if (eInitStrategy == WeightInitializationStrategy::XAVIER)
{
Expand All @@ -1464,18 +1478,20 @@ template <typename LAYERDATA>

// compute variance and mean of input and output
//...


// compute the weights
for (auto& layer: layers ())
{
double nIn = numInput;
double stdDev = sqrt (2.0/nIn);
for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
// for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
for (size_t iWeight = 0, iWeightEnd = (BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
{
(*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU
++itWeight;
}
layerWeightNumber.push_back((int)layer.numWeights (numInput));
numInput = layer.numNodes ();
}
return;
Expand All @@ -1496,12 +1512,14 @@ template <typename LAYERDATA>
double nIn = numInput;
double minVal = -sqrt(2.0/nIn);
double maxVal = sqrt (2.0/nIn);
for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
// for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
for (size_t iWeight = 0, iWeightEnd = (BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
{

(*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU
++itWeight;
}
layerWeightNumber.push_back((int)layer.numWeights (numInput));
numInput = layer.numNodes ();
}
return;
Expand All @@ -1520,11 +1538,13 @@ template <typename LAYERDATA>
for (auto& layer: layers ())
{
// double nIn = numInput;
for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
// for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
for (size_t iWeight = 0, iWeightEnd = (BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
{
(*itWeight) = DNN::gaussDouble (0.0, 0.1);
++itWeight;
}
layerWeightNumber.push_back((int)layer.numWeights (numInput));
numInput = layer.numNodes ();
}
return;
Expand All @@ -1543,11 +1563,13 @@ template <typename LAYERDATA>
for (auto& layer: layers ())
{
double nIn = numInput;
for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
// for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
for (size_t iWeight = 0, iWeightEnd = (BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight)
{
(*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU
++itWeight;
}
layerWeightNumber.push_back((int)layer.numWeights (numInput));
numInput = layer.numNodes ();
}
return;
Expand Down