Skip to content
Closed
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
818afb9
included xxhash.h
adi-sharma Jun 23, 2016
db6990f
added hashing initializations
adi-sharma Jun 24, 2016
efa2efb
added weightBucket
adi-sharma Jun 24, 2016
c821a57
edited applyWeights() without drop-out as an example
adi-sharma Jun 28, 2016
b6328a7
Updated all applyWeights() with HashedNets
adi-sharma Jul 1, 2016
a58ac1d
Update NeuralNet.h
adi-sharma Jul 1, 2016
c81f248
updated update() and Steepest Gradient Descent
adi-sharma Jul 1, 2016
96c8f42
Updated SGD operator() with HashedNets
adi-sharma Jul 1, 2016
0250775
updated weightDecay()
adi-sharma Jul 1, 2016
7c62f3b
updated weightDecay() with HashedNets
adi-sharma Jul 1, 2016
fe93bb2
Edited dropOutWeightFactor()
adi-sharma Jul 13, 2016
13d151a
Edited dropOutWeightFactor()
adi-sharma Jul 13, 2016
725bba3
Updated with HashedNets
adi-sharma Jul 18, 2016
4ec8917
Updated train() with HashedNets
adi-sharma Jul 18, 2016
b1caf73
Updated with complete HashedNets
adi-sharma Aug 23, 2016
c696958
Updated with complete HashedNets
adi-sharma Aug 23, 2016
8d6b2d4
Updated with Complete HashedNets
adi-sharma Aug 23, 2016
e277c73
Update MethodDNN.h
adi-sharma Aug 23, 2016
a820db6
Update NeuralNet.h
adi-sharma Aug 23, 2016
919a319
Updated with HashedNets
adi-sharma Aug 23, 2016
4d9e46c
Error corrections
adi-sharma Aug 24, 2016
3d7eec3
Error corrections NeuralNet.icc
adi-sharma Aug 24, 2016
d202ae1
Error corrections MethodDNN.cxx
adi-sharma Aug 24, 2016
52dc851
Successful compile
adi-sharma Aug 25, 2016
70484aa
Successful compile
adi-sharma Aug 25, 2016
6a0932f
Successful compile
adi-sharma Aug 25, 2016
4f5b919
Successful compile
adi-sharma Aug 25, 2016
96b29df
Successful build NeuralNet.h
adi-sharma Aug 25, 2016
f301784
Successful build NeuralNet.icc
adi-sharma Aug 25, 2016
9f1dce5
Successful build NeuralNet.cxx
adi-sharma Aug 25, 2016
f5e6943
Update NeuralNet.icc
adi-sharma Aug 26, 2016
c4f8749
Update NeuralNet.h
adi-sharma Aug 26, 2016
9637a69
Update MethodDNN.cxx
adi-sharma Aug 26, 2016
f62fcf2
Update NeuralNet.cxx
adi-sharma Aug 26, 2016
e48d24f
Made some logical changes in HashedNets
adi-sharma Aug 27, 2016
2bf3295
Made some logical changes in HashedNets
adi-sharma Aug 27, 2016
e048347
Update NeuralNet.icc
adi-sharma Aug 28, 2016
24c19c2
Production version v1.0
adi-sharma Aug 28, 2016
3c7f7ac
Production Version v1.0
adi-sharma Aug 28, 2016
e6332d5
Production Version v1.0
adi-sharma Aug 28, 2016
5c33612
Production Version v1.0
adi-sharma Aug 28, 2016
6eed664
Production Version v1.0
adi-sharma Aug 28, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Made some logical changes in HashedNets
  • Loading branch information
adi-sharma authored Aug 27, 2016
commit 2bf3295db8f6e6a43dfec7207df9e1b032d681b5
84 changes: 43 additions & 41 deletions tmva/tmva/inc/TMVA/NeuralNet.icc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ namespace TMVA
*/
template <bool HasDropOut, typename ItSource, typename ItTarget, typename ItDrop>
void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,
int itWeight, std::vector<double>& weightBucket,
int itWeight, std::vector<double>& weightBucket, size_t layerNumber,
ItTarget itTargetBegin, ItTarget itTargetEnd,
ItDrop itDrop)
{
Expand All @@ -88,7 +88,7 @@ template <bool HasDropOut, typename ItSource, typename ItTarget, typename ItDrop
for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
{
if (!HasDropOut || *itDrop)
(*itTarget) += (*itSource) * (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]);
(*itTarget) += (*itSource) * (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]);
++itWeight;
}
if (HasDropOut) ++itDrop;
Expand All @@ -105,14 +105,14 @@ template <bool HasDropOut, typename ItSource, typename ItTarget, typename ItDrop
* itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards)
*/
template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector<double>& weightBucket, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop)
void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector<double>& weightBucket, size_t layerNumber, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop)
{
for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
{
for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
{
if (!HasDropOut || *itDrop)
(*itPrev) += (*itCurr) * (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]);
(*itPrev) += (*itCurr) * (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]);
++itWeight;
}
if (HasDropOut) ++itDrop;
Expand Down Expand Up @@ -170,15 +170,15 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
void update (ItSource itSource, ItSource itSourceEnd,
ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
ItTargetGradient itTargetGradientBegin,
int itGradient, std::vector<double>& gradientBucket)
int itGradient, std::vector<double>& gradientBucket, size_t layerNumber)
{
while (itSource != itSourceEnd)
{
auto itTargetDelta = itTargetDeltaBegin;
auto itTargetGradient = itTargetGradientBegin;
while (itTargetDelta != itTargetDeltaEnd)
{
(gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
(gradientBucket[(hasherFunction(itGradient) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
++itTargetDelta; ++itTargetGradient; ++itGradient;
}
++itSource;
Expand Down Expand Up @@ -225,7 +225,7 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
ItTargetGradient itTargetGradientBegin,
int itGradient, std::vector<double>& gradientBucket,
int itWeight, std::vector<double>& weightBucket, double& factorWeightDecay)
int itWeight, std::vector<double>& weightBucket, double& factorWeightDecay, size_t layerNumber)
{
// ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights
while (itSource != itSourceEnd)
Expand All @@ -234,7 +234,7 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
auto itTargetGradient = itTargetGradientBegin;
while (itTargetDelta != itTargetDeltaEnd)
{
(gradientBucket[hasherFunction(itGradient) % BUCKET_SIZE]) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(weightBucket[hasherFunction(itWeight) % BUCKET_SIZE],factorWeightDecay);
(gradientBucket[(hasherFunction(itGradient) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)],factorWeightDecay);
++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
}
++itSource;
Expand Down Expand Up @@ -490,7 +490,7 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
*
*/
template <typename EnumRegularization>
double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector<double>& weightBucket, double factorWeightDecay, EnumRegularization eRegularization)
double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector<double>& weightBucket, double factorWeightDecay, EnumRegularization eRegularization, size_t layerNumber)
{
if (eRegularization == EnumRegularization::L1)
{
Expand All @@ -500,7 +500,7 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
int itWeight;
for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n)
{
double weight = (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]);
double weight = (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]);
w += std::fabs (weight);
}
return error + 0.5 * w * factorWeightDecay / n;
Expand All @@ -513,7 +513,7 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
int itWeight;
for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n)
{
double weight = (weightBucket[hasherFunction(itWeight) % BUCKET_SIZE]);
double weight = (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]);
w += weight*weight;
}
return error + 0.5 * w * factorWeightDecay / n;
Expand All @@ -540,20 +540,20 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
*
*/
template <typename LAYERDATA>
void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector<double>& weightBucket)
void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector<double>& weightBucket, size_t layerNumber)
{
if (prevLayerData.hasDropOut ())
{
applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
currLayerData.weightsBegin (), weightBucket,
currLayerData.weightsBegin (), weightBucket, layerNumber,
currLayerData.valuesBegin (), currLayerData.valuesEnd (),
prevLayerData.dropOut ());
}
else
{
bool dummy = true;
applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
currLayerData.weightsBegin (), weightBucket,
currLayerData.weightsBegin (), weightBucket, layerNumber,
currLayerData.valuesBegin (), currLayerData.valuesEnd (),
&dummy); // dummy to turn on all nodes (no drop out)
}
Expand All @@ -566,20 +566,20 @@ template <bool HasDropOut, typename ItSource, typename ItPrev, typename ItDrop>
*
*/
template <typename LAYERDATA>
void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector<double>& weightBucket)
void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector<double>& weightBucket, size_t layerNumber)
{
if (prevLayerData.hasDropOut ())
{
applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
currLayerData.weightsBegin (), weightBucket,
currLayerData.weightsBegin (), weightBucket, layerNumber,
prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
prevLayerData.dropOut ());
}
else
{
bool dummy = true;
applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
currLayerData.weightsBegin (), weightBucket,
currLayerData.weightsBegin (), weightBucket, layerNumber,
prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
&dummy); // dummy to use all nodes (no drop out)
}
Expand All @@ -594,7 +594,7 @@ template <typename LAYERDATA>
*
*/
template <typename LAYERDATA>
void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector<double>& weightBucket, std::vector<double>& gradientBucket)
void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector<double>& weightBucket, std::vector<double>& gradientBucket, size_t layerNumber)
{
// ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights
if (factorWeightDecay != 0.0) // has weight regularization
Expand All @@ -605,7 +605,7 @@ template <typename LAYERDATA>
currLayerData.deltasEnd (),
currLayerData.valueGradientsBegin (),
currLayerData.gradientsBegin (), gradientBucket,
currLayerData.weightsBegin (), weightBucket, factorWeightDecay);
currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber);
}
else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) )
{
Expand All @@ -614,22 +614,22 @@ template <typename LAYERDATA>
currLayerData.deltasEnd (),
currLayerData.valueGradientsBegin (),
currLayerData.gradientsBegin (), gradientBucket,
currLayerData.weightsBegin (), weightBucket, factorWeightDecay);
currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber);
}
else
{
update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
currLayerData.deltasBegin (), currLayerData.deltasEnd (),
currLayerData.valueGradientsBegin (),
currLayerData.gradientsBegin (), gradientBucket);
currLayerData.gradientsBegin (), gradientBucket, layerNumber);
}

else
{ // no weight regularization
update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
currLayerData.deltasBegin (), currLayerData.deltasEnd (),
currLayerData.valueGradientsBegin (),
currLayerData.gradientsBegin (), gradientBucket);
currLayerData.gradientsBegin (), gradientBucket, layerNumber);
}
}

Expand Down Expand Up @@ -659,20 +659,22 @@ template <typename LAYERDATA>
if (drops.empty () || weightBucket.empty ())
return;

int itWeight = 0;
int itWeightEnd = (int) numWeights ();
int itWeightBucket = 0;
int itWeightBucketEnd = (int) weightBucket.size();
auto itDrop = std::begin (drops);
auto itDropEnd = std::end (drops);
size_t numNodesPrev = inputSize ();
// size_t numNodesPrev = inputSize ();
double dropFractionPrev = *itDrop;
++itDrop;

// size_t layerNumber = 0;

for (auto& layer : layers ())
{
if (itDrop == itDropEnd)
break;

size_t _numNodes = layer.numNodes ();
// size_t _numNodes = layer.numNodes ();

double dropFraction = *itDrop;
double pPrev = 1.0 - dropFractionPrev;
Expand All @@ -683,18 +685,19 @@ template <typename LAYERDATA>
{
p = 1.0/p;
}
size_t _numWeights = layer.numWeights (numNodesPrev);
for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
// size_t _numWeights = layer.numWeights (numNodesPrev);
for (size_t iWeightBucket = 0; iWeightBucket < BUCKET_SIZE; ++iWeightBucket)
{
if (itWeight == itWeightEnd)
if (itWeightBucket == itWeightBucketEnd)
break;

weightBucket[hasherFunction(itWeight) % BUCKET_SIZE] *= p;
++itWeight;
weightBucket[itWeightBucket] *= p;
++itWeightBucket;
}
numNodesPrev = _numNodes;
// numNodesPrev = _numNodes;
dropFractionPrev = dropFraction;
++itDrop;
// ++layerNumber;
}
}

Expand Down Expand Up @@ -1221,7 +1224,7 @@ template <typename LAYERDATA>
LayerData& prevLayerData = layerData.at (idxLayer);
LayerData& currLayerData = layerData.at (idxLayer+1);

forward (prevLayerData, currLayerData, weightBucket);
forward (prevLayerData, currLayerData, weightBucket, idxLayer);

applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
}
Expand Down Expand Up @@ -1261,7 +1264,7 @@ template <typename LAYERDATA>
LayerData& currLayerData = currLayerPatternData.at (idxPattern);


forward (prevLayerData, currLayerData, weightBucket); // feed forward
forward (prevLayerData, currLayerData, weightBucket, idxLayer); // feed forward
}

// ---------------- loop over layerDatas of pattern apply non-linearities ----------------------------
Expand Down Expand Up @@ -1334,17 +1337,16 @@ template <typename LAYERDATA>
double sumError (0.0);

size_t idxPattern = 0;
for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData, ++itLayerDataNext)
for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData, ++itLayerDataNext, ++idxPattern)
{
++idxPattern;

// compute E and the deltas of the computed output and the true output
LayerData& layerData = (*itLayerData);
LayerData& nextLayerData = (*itLayerDataNext);
const Pattern& _pattern = (*itPattern);
double error = errorFunction (layerData, nextLayerData, _pattern.output (),
_pattern.weight (), weightBucket, settings.factorWeightDecay (),
settings.regularization ());
settings.regularization (), idxPattern);
sumWeights += fabs (_pattern.weight ());
sumError += error;
}
Expand Down Expand Up @@ -1382,7 +1384,7 @@ template <typename LAYERDATA>
LayerData& currLayerData = (*itCurrLayerData);
LayerData& prevLayerData = *(itPrevLayerData);

backward (prevLayerData, currLayerData, weightBucket);
backward (prevLayerData, currLayerData, weightBucket, idxLayer);

// the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses)
// because L1 and L2 regularization
Expand All @@ -1391,7 +1393,7 @@ template <typename LAYERDATA>
//
// L1 : -factorWeightDecay*sgn(w)/numWeights
// L2 : -factorWeightDecay/numWeights
update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization (), weightBucket, gradientBucket);
update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization (), weightBucket, gradientBucket, idxLayer);
}
}
}
Expand Down Expand Up @@ -1592,7 +1594,7 @@ template <typename LAYERDATA>
double patternWeight,
std::vector<double>& weightBucket,
double factorWeightDecay,
EnumRegularization eRegularization) const
EnumRegularization eRegularization, size_t layerNumber) const
{
double error (0);
switch (m_eErrorFunction)
Expand Down Expand Up @@ -1630,7 +1632,7 @@ template <typename LAYERDATA>
}
if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)
{
error = weightDecay (error, layerData.weightsBegin (), nextLayerData.weightsBegin (), weightBucket, factorWeightDecay, eRegularization);
error = weightDecay (error, layerData.weightsBegin (), nextLayerData.weightsBegin (), weightBucket, factorWeightDecay, eRegularization, layerNumber);
}
return error;
}
Expand Down