diff --git a/tmva/tmva/inc/TMVA/MethodDNN.h b/tmva/tmva/inc/TMVA/MethodDNN.h index 31a3f11d27775..af7dc3cb2e964 100644 --- a/tmva/tmva/inc/TMVA/MethodDNN.h +++ b/tmva/tmva/inc/TMVA/MethodDNN.h @@ -1,5 +1,5 @@ // @(#)root/tmva $Id$ -// Author: Peter Speckmayer +// Authors: Peter Speckmayer, Aditya Sharma /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -10,7 +10,8 @@ * Description: * * NeuralNetwork * * * - * Authors (alphabetical): * + * Authors (alphabetical): + * Aditya Sharma - CERN, Switzerland * Peter Speckmayer - CERN, Switzerland * * * * Copyright (c) 2005-2015: * @@ -70,12 +71,10 @@ namespace TMVA { MethodDNN ( const TString& jobName, const TString& methodTitle, DataSetInfo& theData, - const TString& theOption, - TDirectory* theTargetDir = 0 ); + const TString& theOption); MethodDNN ( DataSetInfo& theData, - const TString& theWeightFile, - TDirectory* theTargetDir = 0 ); + const TString& theWeightFile ); virtual ~MethodDNN(); @@ -131,7 +130,8 @@ namespace TMVA { private: TMVA::DNN::Net fNet; - std::vector fWeights; + std::vector fWeightBucket; + int fBucketSize; TString fLayoutString; std::vector> fLayout; diff --git a/tmva/tmva/inc/TMVA/NeuralNet.h b/tmva/tmva/inc/TMVA/NeuralNet.h index 3ffaca7df35bb..aac47572fabca 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.h +++ b/tmva/tmva/inc/TMVA/NeuralNet.h @@ -1,6 +1,6 @@ /** * @file NeuralNet - * @author Peter Speckmayer + * @author Peter Speckmayer, Aditya Sharma * @version 1.0 * * @section LICENSE @@ -52,6 +52,7 @@ #include // turn on or off exceptions for NaN and other numeric exceptions +#include namespace TMVA { @@ -61,6 +62,17 @@ namespace TMVA // double gaussDoubl (edouble mean, double sigma); + // const int BUCKET_SIZE = 8; // ------------------------------- Declare Bucket Size -------------------------------------------- + /*! \brief Hash initialization + * + * + */ + // std::hash hasherFunction; + + + int hasherFunction(int a); + + // --------------------------------------------------------------------------------- double gaussDouble (double mean, double sigma); @@ -74,63 +86,80 @@ namespace TMVA { public: MeanVariance() - : m_n(0) - , m_sumWeights(0) - , m_sumWeightsSquared(0) - , m_mean(0) - , m_squared(0) - {} - - inline void clear() - { - m_n = 0; - m_sumWeights = 0; - m_sumWeightsSquared = 0; - } - - template - inline void add(T value, double weight = 1.0) - { - m_n++; // a value has been added - - double dValue = (double)value; - if (m_n == 1) // initialization - { - m_mean = dValue; - m_squared = 0.0; - m_sumWeightsSquared = weight*weight; - m_sumWeights = weight; - return; - } - - double tmpWeight = m_sumWeights+weight; - double diff = dValue - m_mean; - - double tmp = diff*weight/tmpWeight; - m_mean = m_mean + tmp; - m_squared = m_squared + tmpWeight*diff*tmp; - - m_sumWeights = tmpWeight; - m_sumWeightsSquared += weight*weight; - } - - - - inline int count() const { return m_n; } - inline double weights() const { if(m_n==0) return 0; return m_sumWeights; } - inline double mean() const { if(m_n==0) return 0; return m_mean; } - inline double var_N() const { if(m_n==0) return 0; return (m_squared/m_sumWeights); } - // inline double var () const { return (Variance_N()*m_n/(m_n-1)); } // unbiased for small sample sizes - inline double var () const { if(m_n==0) return 0; if(m_squared<=0) return 0.0; return (m_squared*m_sumWeights/(m_sumWeights*m_sumWeights-m_sumWeightsSquared)); } // unbiased for small sample sizes - inline double stdDev_N () const { return sqrt( var_N() ); } - inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes + : m_n(0) + , m_sumWeights(0) + , m_mean(0) + , m_squared(0) + {} + + inline void clear() + { + m_n = 0; + m_sumWeights = 0; + m_mean = 0; + m_squared = 0; + } + + template + inline void add(T value, double weight = 1.0) + { + ++m_n; // a value has been added + + if (m_n == 1) // initialization + { + m_mean = value; + m_squared = 0.0; + m_sumWeights = weight; + return; + } + + double tmpWeight = m_sumWeights+weight; + double Q = value - m_mean; + + double R = Q*weight/tmpWeight; + m_mean += R; + m_squared += m_sumWeights*R*Q; + + m_sumWeights = tmpWeight; + } + + template + inline void add (ITERATOR itBegin, ITERATOR itEnd) + { + for (ITERATOR it = itBegin; it != itEnd; ++it) + add (*it); + } + + + + inline int count() const { return m_n; } + inline double weights() const { if(m_n==0) return 0; return m_sumWeights; } + inline double mean() const { if(m_n==0) return 0; return m_mean; } + inline double var() const + { + if(m_n==0) + return 0; + if (m_squared <= 0) + return 0; + return (m_squared/m_sumWeights); + } + + inline double var_corr () const + { + if (m_n <= 1) + return var (); + + return (var()*m_n/(m_n-1)); // unbiased for small sample sizes + } + + inline double stdDev_corr () const { return sqrt( var_corr() ); } + inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes private: - size_t m_n; - double m_sumWeights; - double m_sumWeightsSquared; - double m_mean; - double m_squared; + size_t m_n; + double m_sumWeights; + double m_mean; + double m_squared; }; @@ -238,41 +267,41 @@ namespace TMVA - template - void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd); + template + void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE, ItTarget itTargetBegin, ItTarget itTargetEnd); - template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd); + template + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop); - template - void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction); + template + void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc); - template - void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, ItGradient itGradient); + template + void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, ItInverseFunction invFnc, ItGradient itGradient); - template + template void update (ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, - ItGradient itGradient); + int itGradient, std::vector& gradientBucket, size_t layerNumber, int BUCKET_SIZE); - template + template void update (ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, - ItGradient itGradient, - ItWeight itWeight, double weightDecay); + int itGradient, std::vector& gradientBucket, + int itWeight, std::vector& weightBucket, double& factorWeightDecay, size_t layerNumber, int BUCKET_SIZE); @@ -346,13 +375,13 @@ namespace TMVA * is not touched by the minimizer; This object is provided to the fitness function when * called */ - template - double operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough); + template + double operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough, const size_t& numWeights, std::vector& layerWeightNumber, const int& BUCKET_SIZE); double m_alpha; ///< internal parameter (learningRate) double m_beta; ///< internal parameter (momentum) - std::vector m_prevGradients; ///< vector remembers the gradients of the previous step + std::vector m_prevGradientBucket; ///< vector remembers the gradients of the previous step }; @@ -372,26 +401,26 @@ namespace TMVA - template - double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight); + template + double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight); template - double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight); + double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight); template - double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight); + double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight); - template - double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization); + template + double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization, size_t layerNumber, int BUCKET_SIZE); @@ -467,8 +496,8 @@ namespace TMVA * output values (mutually exclusive probability) */ LayerData (size_t size, - const_iterator_type itWeightBegin, - iterator_type itGradientBegin, + int itWeightBegin, + int itGradientBegin, std::shared_ptr> activationFunction, std::shared_ptr> inverseActivationFunction, ModeOutputValues eModeOutput = ModeOutputValues::DIRECT); @@ -486,7 +515,7 @@ namespace TMVA * output value (to create a probability); SOFTMAX applies a softmax transformation to all * output values (mutually exclusive probability) */ - LayerData (size_t size, const_iterator_type itWeightBegin, + LayerData (size_t size, int itWeightBegin, std::shared_ptr> activationFunction, ModeOutputValues eModeOutput = ModeOutputValues::DIRECT); @@ -501,7 +530,8 @@ namespace TMVA , m_deltas (other.m_deltas) , m_valueGradients (other.m_valueGradients) , m_values (other.m_values) - , m_hasDropOut (false) + , m_itDropOut (other.m_itDropOut) + , m_hasDropOut (other.m_hasDropOut) , m_itConstWeightBegin (other.m_itConstWeightBegin) , m_itGradientBegin (other.m_itGradientBegin) , m_activationFunction (other.m_activationFunction) @@ -520,14 +550,15 @@ namespace TMVA : m_size (other.m_size) , m_itInputBegin (other.m_itInputBegin) , m_itInputEnd (other.m_itInputEnd) - , m_deltas (other.m_deltas) - , m_valueGradients (other.m_valueGradients) - , m_values (other.m_values) - , m_hasDropOut (false) + , m_deltas (std::move(other.m_deltas)) + , m_valueGradients (std::move(other.m_valueGradients)) + , m_values (std::move(other.m_values)) + , m_itDropOut (other.m_itDropOut) + , m_hasDropOut (other.m_hasDropOut) , m_itConstWeightBegin (other.m_itConstWeightBegin) , m_itGradientBegin (other.m_itGradientBegin) - , m_activationFunction (other.m_activationFunction) - , m_inverseActivationFunction (other.m_inverseActivationFunction) + , m_activationFunction (std::move(other.m_activationFunction)) + , m_inverseActivationFunction (std::move(other.m_inverseActivationFunction)) , m_isInputLayer (other.m_isInputLayer) , m_hasWeights (other.m_hasWeights) , m_hasGradients (other.m_hasGradients) @@ -566,7 +597,7 @@ namespace TMVA iterator_type valuesEnd () { assert (!m_isInputLayer); return end (m_values); } ///< returns iterator to the end of the (node) values ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode - container_type probabilities () { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them + container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation) iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation) @@ -580,9 +611,10 @@ namespace TMVA const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients - iterator_type gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients - const_iterator_type gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients - const_iterator_type weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer + int gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients + int gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients + int weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer + std::shared_ptr> activationFunction () const { return m_activationFunction; } std::shared_ptr> inverseActivationFunction () const { return m_inverseActivationFunction; } @@ -600,7 +632,7 @@ namespace TMVA void clearDropOut () { m_hasDropOut = false; } bool hasDropOut () const { return m_hasDropOut; } ///< has this layer drop-out turned on? - const_dropout_iterator dropOut () const { return m_itDropOut; } ///< return the begin of the drop-out information + const_dropout_iterator dropOut () const { assert (m_hasDropOut); return m_itDropOut; } ///< return the begin of the drop-out information size_t size () const { return m_size; } ///< return the size of the layer @@ -610,7 +642,7 @@ namespace TMVA * * */ - container_type computeProbabilities (); + container_type computeProbabilities () const; private: @@ -620,13 +652,14 @@ namespace TMVA const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector std::vector m_deltas; ///< stores the deltas for the DNN training - std::vector m_valueGradients; ///< stores the gradients of the values (nodes) + std::vector m_valueGradients; ///< stores the gradients of the values (nodes) + std::vector m_values; ///< stores the values of the nodes in this layer const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped bool m_hasDropOut; ///< dropOut is turned on? - const_iterator_type m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector - iterator_type m_itGradientBegin; ///< const iterator to the first gradient of this layer in the gradient vector + int m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector + int m_itGradientBegin; ///< iterator to the first gradient of this layer in the gradient vector std::shared_ptr> m_activationFunction; ///< activation function for this layer std::shared_ptr> m_inverseActivationFunction; ///< inverse activation function for this layer @@ -693,18 +726,15 @@ namespace TMVA template - void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData); - - template - void forward_training (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData); + void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE); template - void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData); + void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE); template - void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization); + void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket, size_t layerNumber, int BUCKET_SIZE); @@ -722,12 +752,11 @@ namespace TMVA */ Settings (TString name, size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7, - double _factorWeightDecay = 1e-5, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE, + double _factorWeightDecay = 1e-5, int _bucketSize = 8, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE, MinimizerType _eMinimizerType = MinimizerType::fSteepest, double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3, - bool _multithreading = true, - bool _doBatchNormalization = true); + bool _multithreading = true); /*! \brief d'tor * @@ -759,6 +788,7 @@ namespace TMVA double momentum () const { return fMomentum; } ///< get the momentum (e.g. for SGD) int repetitions () const { return fRepetitions; } ///< how many steps have to be gone until the batch is changed MinimizerType minimizerType () const { return fMinimizerType; } ///< which minimizer shall be used (e.g. SGD) + int bucketSize () const { return fBucketSize; } ///< Number of Weight Buckets per Layer @@ -800,7 +830,6 @@ namespace TMVA EnumRegularization regularization () const { return m_regularization; } ///< some regularization of the DNN is turned on? bool useMultithreading () const { return m_useMultithreading; } ///< is multithreading turned on? - bool doBatchNormalization () const { return m_doBatchNormalization; } void pads (int numPads) { if (fMonitoring) fMonitoring->pads (numPads); } ///< preparation for monitoring @@ -841,6 +870,7 @@ namespace TMVA double fMomentum; int fRepetitions; MinimizerType fMinimizerType; + int fBucketSize; size_t m_convergenceCount; size_t m_maxConvergenceCount; @@ -849,7 +879,6 @@ namespace TMVA protected: bool m_useMultithreading; - bool m_doBatchNormalization; std::shared_ptr fMonitoring; }; @@ -889,13 +918,12 @@ namespace TMVA */ ClassificationSettings (TString name, size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7, - double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE, + double _factorWeightDecay = 1e-5, int _bucketSize = 8, EnumRegularization _regularization = EnumRegularization::NONE, size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest, double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3, - bool _useMultithreading = true, - bool _useBatchNormalization = true) - : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay, - _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading, _useBatchNormalization) + bool _useMultithreading = true) + : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay, _bucketSize, + _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading) , m_ams () , m_sumOfSigWeights (0) , m_sumOfBkgWeights (0) @@ -1065,6 +1093,7 @@ namespace TMVA : m_eErrorFunction (ModeErrorFunction::SUMOFSQUARES) , m_sizeInput (0) , m_layers () + , m_bucketSize (8) { } @@ -1076,6 +1105,7 @@ namespace TMVA : m_eErrorFunction (other.m_eErrorFunction) , m_sizeInput (other.m_sizeInput) , m_layers (other.m_layers) + , m_bucketSize (other.m_bucketSize) { } @@ -1092,8 +1122,8 @@ namespace TMVA * * */ - template - void dropOutWeightFactor (WeightsType& weights, + template + void dropOutWeightFactor (std::vector& weightBucket, const DropProbabilities& drops, bool inverse = false); @@ -1106,10 +1136,11 @@ namespace TMVA * \param settings settings used for this training run */ template - double train (std::vector& weights, + double train (std::vector& weightBucket, std::vector& layerWeightNumber, std::vector& trainPattern, const std::vector& testPattern, - Minimizer& minimizer, Settings& settings); + Minimizer& minimizer, + Settings& settings); /*! \brief pre-training for future use * @@ -1132,39 +1163,84 @@ namespace TMVA * \param dropContainer the configuration for DNN drop-out */ template - inline double trainCycle (Minimizer& minimizer, std::vector& weights, - Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer); + double trainCycle (Minimizer& minimizer, std::vector& weightBucket, std::vector& layerWeightNumber, + Iterator itPatternBegin, Iterator itPatternEnd, + Settings& settings, + DropContainer& dropContainer); + + template + void forwardPattern (const LayerContainer& _layers, + std::vector& layerData, std::vector& weightBucket, int BUCKET_SIZE) const; size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net + size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net + + template + std::vector compute (const std::vector& input, Weights& weightBucket, int BUCKET_SIZE) const; ///< compute the net with the given input and the given weights - template - std::vector compute (const std::vector& input, const Weights& weights) const; ///< compute the net with the given input and the given weights + template + double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients + + template + double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function + + template + double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well + + template + double operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket, ModeOutput eFetch, OutContainer& outputContainer) const; + + + template + std::vector> prepareLayerData (LayerContainer& layers, + Batch& batch, + const DropContainer& dropContainer, + int itWeightBegin, + int itWeightEnd, + int itGradientBegin, + int itGradientEnd, + size_t& totalNumWeights) const; - template - double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients - template - double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function + + + template + void forwardBatch (const LayerContainer& _layers, + LayerPatternContainer& layerPatternData, + std::vector& valuesMean, + std::vector& valuesStdDev, + size_t trainFromLayer, std::vector& weightBucket) const; - template - double operator() (PassThrough& settingsAndBatch, const Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well + template + void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const; - template - double operator() (PassThrough& settingsAndBatch, const Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const; + template + void fetchOutput (const std::vector& layerPatternData, OutputContainer& outputContainer) const; + template + std::tuple computeError (const Settings& settings, + std::vector& lastLayerData, + Batch& batch, Weights& weightBucket) const; + template + void backPropagate (std::vector>& layerPatternData, std::vector& weightBucket, std::vector& gradientBucket, + const Settings& settings, + size_t trainFromLayer, + size_t totalNumWeights) const; - /*! \brief main DNN computation function + + + /*! \brief main NN computation function * * */ - template + template double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch, - ItWeight itWeightBegin, - ItGradient itGradientBegin, ItGradient itGradientEnd, + int itWeightBegin, int itWeightEnd, + int itGradientBegin, int itGradientEnd, size_t trainFromLayer, - OutContainer& outputContainer, bool fetchOutput) const; + OutContainer& outputContainer, bool fetchOutput, std::vector& weightBucket, std::vector& gradientBucket) const; @@ -1176,14 +1252,14 @@ namespace TMVA * * */ - template + template double errorFunction (LayerData& layerData, + LayerData& nextLayerData, Container truth, - ItWeight itWeight, - ItWeight itWeightEnd, double patternWeight, + std::vector& weightBucket, double factorWeightDecay, - EnumRegularization eRegularization) const; + EnumRegularization eRegularization, size_t layerNumber) const; const std::vector& layers () const { return m_layers; } ///< returns the layers (structure) @@ -1201,7 +1277,8 @@ namespace TMVA template void initializeWeights (WeightInitializationStrategy eInitStrategy, - OutIterator itWeight); ///< initialize the weights with the given strategy + OutIterator itWeight, std::vector& layerWeightNumber, int BUCKET_SIZE); ///< initialize the weights with the given strategy + protected: @@ -1214,11 +1291,13 @@ namespace TMVA size_t m_sizeInput; ///< input size of this DNN size_t m_sizeOutput; ///< outut size of this DNN std::vector m_layers; ///< layer-structure-data + int m_bucketSize; }; +typedef std::tuple pass_through_type; diff --git a/tmva/tmva/inc/TMVA/NeuralNet.icc b/tmva/tmva/inc/TMVA/NeuralNet.icc index 3ef36f1cf6eb5..ae0e7c884b493 100644 --- a/tmva/tmva/inc/TMVA/NeuralNet.icc +++ b/tmva/tmva/inc/TMVA/NeuralNet.icc @@ -8,6 +8,8 @@ #include "Math/Util.h" +#include + namespace TMVA { @@ -17,10 +19,6 @@ namespace TMVA - - - - template T uniformFromTo (T from, T to) { @@ -75,13 +73,13 @@ namespace TMVA -/*! \brief apply weights using drop-out +/*! \brief apply weights using drop-out; for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true" * * itDrop correlates with itSourceBegin */ - template +template void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - ItWeight itWeight, + int itWeight, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE, ItTarget itTargetBegin, ItTarget itTargetEnd, ItDrop itDrop) { @@ -89,81 +87,40 @@ namespace TMVA { for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget) { - if (*itDrop) - (*itTarget) += (*itSource) * (*itWeight); + if (!HasDropOut || *itDrop) + (*itTarget) += (*itSource) * (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); ++itWeight; } - ++itDrop; + if (HasDropOut) ++itDrop; } } -/*! \brief apply weights without drop-out - * - * - */ - template - void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, - ItWeight itWeight, - ItTarget itTargetBegin, ItTarget itTargetEnd) - { - for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource) - { - for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget) - { - (*itTarget) += (*itSource) * (*itWeight); - ++itWeight; - } - } - } - -/*! \brief apply weights backwards (for backprop) +/*! \brief apply weights backwards (for backprop); for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true" * - * + * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards) */ - template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - ItWeight itWeight, - ItPrev itPrevBegin, ItPrev itPrevEnd) +template + void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, int itWeight, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE, ItPrev itPrevBegin, ItPrev itPrevEnd, ItDrop itDrop) { for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) { for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr) { - (*itPrev) += (*itCurr) * (*itWeight); + if (!HasDropOut || *itDrop) + (*itPrev) += (*itCurr) * (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); ++itWeight; } + if (HasDropOut) ++itDrop; } } -/*! \brief apply weights backwards (for backprop) - * - * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards) - */ - template - void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, - ItWeight itWeight, - ItPrev itPrevBegin, ItPrev itPrevEnd, - ItDrop itDrop) - { - for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev) - { - for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr) - { - if (*itDrop) - (*itPrev) += (*itCurr) * (*itWeight); - ++itWeight; - } - ++itDrop; - } - } - @@ -190,8 +147,8 @@ namespace TMVA * * */ - template - void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient) + template + void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, ItInverseFunction invFnc, ItGradient itGradient) { while (itValue != itValueEnd) { @@ -209,11 +166,11 @@ namespace TMVA * * */ - template + template void update (ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, - ItGradient itGradient) + int itGradient, std::vector& gradientBucket, size_t layerNumber, int BUCKET_SIZE) { while (itSource != itSourceEnd) { @@ -221,7 +178,7 @@ namespace TMVA auto itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - (*itGradient) += - (*itTargetDelta) * (*itSource) * (*itTargetGradient); + (gradientBucket[(hasherFunction(itGradient) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient); ++itTargetDelta; ++itTargetGradient; ++itGradient; } ++itSource; @@ -263,12 +220,12 @@ namespace TMVA * * */ - template - void update (ItSource itSource, ItSource itSourceEnd, - ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, - ItTargetGradient itTargetGradientBegin, - ItGradient itGradient, - ItWeight itWeight, double weightDecay) + template + void update (ItSource itSource, ItSource itSourceEnd, + ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, + ItTargetGradient itTargetGradientBegin, + int itGradient, std::vector& gradientBucket, + int itWeight, std::vector& weightBucket, double& factorWeightDecay, size_t layerNumber, int BUCKET_SIZE) { // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights while (itSource != itSourceEnd) @@ -277,7 +234,7 @@ namespace TMVA auto itTargetGradient = itTargetGradientBegin; while (itTargetDelta != itTargetDeltaEnd) { - (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(*itWeight,weightDecay); + (gradientBucket[(hasherFunction(itGradient) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization(weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)],factorWeightDecay); ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight; } ++itSource; @@ -288,7 +245,6 @@ namespace TMVA - #define USELOCALWEIGHTS 1 @@ -297,18 +253,18 @@ namespace TMVA * * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle */ - template - double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough) + template + double Steepest::operator() (Function& fitnessFunction, std::vector& weightBucket, PassThrough& passThrough, const size_t& numWeights, std::vector& layerWeightNumber, const int& BUCKET_SIZE) { - size_t numWeights = weights.size (); - std::vector gradients (numWeights, 0.0); - std::vector localWeights (begin (weights), end (weights)); + // std::vector gradients (numWeights, 0.0); + std::vector gradientBucket (weightBucket.size (), 0.0); + std::vector localWeightBucket (begin (weightBucket), end (weightBucket)); double E = 1e10; - if (m_prevGradients.size () != numWeights) + if (m_prevGradientBucket.size () != weightBucket.size ()) { - m_prevGradients.clear (); - m_prevGradients.assign (weights.size (), 0); + m_prevGradientBucket.clear (); + m_prevGradientBucket.assign (weightBucket.size (), 0); } bool success = true; @@ -318,58 +274,71 @@ namespace TMVA if (currentRepetition >= m_repetitions) break; - gradients.assign (numWeights, 0.0); + gradientBucket.assign (weightBucket.size (), 0.0); // --- nesterov momentum --- // apply momentum before computing the new gradient - auto itPrevG = begin (m_prevGradients); - auto itPrevGEnd = end (m_prevGradients); - auto itLocWeight = begin (localWeights); - for (; itPrevG != itPrevGEnd; ++itPrevG) + int itPrevG = 0; + int itPrevGEnd = numWeights; + int itLocWeight = 0; + int itLWN, layerNumber = 0; + + for (auto itLayerWeightNumber = layerWeightNumber.begin(); itLayerWeightNumber != layerWeightNumber.end(); ++itLayerWeightNumber, ++layerNumber) { - (*itPrevG) *= m_beta; - (*itLocWeight) += (*itPrevG); + for(itLWN = 0; itLWN < *itLayerWeightNumber; ++itLWN) + { + (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) *= m_beta; + (localWeightBucket[(hasherFunction(itLocWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) += (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); + ++itPrevG; ++itLocWeight; + } } - E = fitnessFunction (passThrough, localWeights, gradients); + E = fitnessFunction (passThrough, localWeightBucket, gradientBucket); // ************************** // plotGradients (gradients); +// plotWeights (localWeightBucket); double alpha = gaussDouble (m_alpha, m_alpha/2.0); // double alpha = m_alpha; - auto itG = begin (gradients); - auto itGEnd = end (gradients); - itPrevG = begin (m_prevGradients); + int itG = 0; + int itGEnd = numWeights; + itPrevG = 0; double maxGrad = 0.0; - for (; itG != itGEnd; ++itG, ++itPrevG) + layerNumber = 0; + + for (auto itLayerWeightNumber = layerWeightNumber.begin(); itLayerWeightNumber != layerWeightNumber.end(); ++itLayerWeightNumber, ++layerNumber) { - double currGrad = (*itG); - double prevGrad = (*itPrevG); - currGrad *= alpha; - - //(*itPrevG) = m_beta * (prevGrad + currGrad); - currGrad += prevGrad; - (*itG) = currGrad; - (*itPrevG) = currGrad; + for(itLWN = 0; itLWN < *itLayerWeightNumber; ++itLWN) + { + double currGrad = (gradientBucket[(hasherFunction(itG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); + double prevGrad = (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); + currGrad *= alpha; - if (std::fabs (currGrad) > maxGrad) - maxGrad = currGrad; + //(*itPrevG) = m_beta * (prevGrad + currGrad); + currGrad += prevGrad; + (gradientBucket[(hasherFunction(itG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) = currGrad; + (m_prevGradientBucket[(hasherFunction(itPrevG) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]) = currGrad; + + if (std::fabs (currGrad) > maxGrad) + maxGrad = currGrad; + ++itG; ++itPrevG; + } } if (maxGrad > 1) { m_alpha /= 2; std::cout << "\nlearning rate reduced to " << m_alpha << std::endl; - std::for_each (weights.begin (), weights.end (), [maxGrad](double& w) + std::for_each (weightBucket.begin (), weightBucket.end (), [maxGrad](double& w) { w /= maxGrad; }); - m_prevGradients.clear (); + m_prevGradientBucket.clear (); } else { - auto itW = std::begin (weights); - std::for_each (std::begin (gradients), std::end (gradients), [&itW](double& g) + auto itW = std::begin (weightBucket); + std::for_each (gradientBucket.begin (), gradientBucket.end (), [&itW](double& g) { *itW += g; ++itW; @@ -398,6 +367,8 @@ namespace TMVA + + /*! \brief sum of squares error function * * @@ -412,7 +383,7 @@ namespace TMVA bool hasDeltas = (itDelta != itDeltaEnd); for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth) { -// assert (itTruth != itTruthEnd); +// assert (itTruth != itTruthEnd); double output = (*itOutput); double error = output - (*itTruth); if (hasDeltas) @@ -449,7 +420,7 @@ namespace TMVA { double delta = probability - truth; (*itDelta) = delta*patternWeight; -// (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight; +// (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight; ++itDelta; } double error (0); @@ -488,13 +459,13 @@ namespace TMVA ItTruth itTruth = itTruthBegin; for (auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth) { -// assert (itTruth != itTruthEnd); +// assert (itTruth != itTruthEnd); double probability = (*itProbability); double truth = (*itTruth); if (hasDeltas) { (*itDelta) = probability - truth; -// (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight; +// (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight; ++itDelta; //++itInvActFnc; } double error (0); @@ -518,17 +489,18 @@ namespace TMVA * * */ - template - double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization) + template + double weightDecay (double error, int currLayerWeightIndex, int nextLayerWeightIndex, std::vector& weightBucket, double factorWeightDecay, EnumRegularization eRegularization, size_t layerNumber, int BUCKET_SIZE) { if (eRegularization == EnumRegularization::L1) { // weight decay (regularization) double w = 0; size_t n = 0; - for (; itWeight != itWeightEnd; ++itWeight, ++n) + int itWeight; + for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n) { - double weight = (*itWeight); + double weight = (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); w += std::fabs (weight); } return error + 0.5 * w * factorWeightDecay / n; @@ -538,9 +510,10 @@ namespace TMVA // weight decay (regularization) double w = 0; size_t n = 0; - for (; itWeight != itWeightEnd; ++itWeight, ++n) + int itWeight; + for (itWeight = currLayerWeightIndex; itWeight != nextLayerWeightIndex; ++itWeight, ++n) { - double weight = (*itWeight); + double weight = (weightBucket[(hasherFunction(itWeight) % BUCKET_SIZE) + (layerNumber * BUCKET_SIZE)]); w += weight*weight; } return error + 0.5 * w * factorWeightDecay / n; @@ -562,72 +535,57 @@ namespace TMVA -/*! \brief apply the weights in forward direction of the DNN +/*! \brief apply the weights (and functions) in forward direction of the DNN * * */ template - void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData) + void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE) { if (prevLayerData.hasDropOut ()) { - applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), + applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), + currLayerData.weightsBegin (), weightBucket, layerNumber, BUCKET_SIZE, currLayerData.valuesBegin (), currLayerData.valuesEnd (), prevLayerData.dropOut ()); } else { - applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), - currLayerData.valuesBegin (), currLayerData.valuesEnd ()); + bool dummy = true; + applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), + currLayerData.weightsBegin (), weightBucket, layerNumber, BUCKET_SIZE, + currLayerData.valuesBegin (), currLayerData.valuesEnd (), + &dummy); // dummy to turn on all nodes (no drop out) } } -/*! \brief apply weights (and functions) in forward direction and compute the gradients - * - * - */ - template - void forward_training (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData) - { - if (prevLayerData.hasDropOut ()) - { - applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), - currLayerData.valuesBegin (), currLayerData.valuesEnd (), - prevLayerData.dropOut ()); - } - else - { - applyWeights (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.weightsBegin (), - currLayerData.valuesBegin (), currLayerData.valuesEnd ()); - } - } /*! \brief backward application of the weights (back-propagation of the error) * * */ - template - void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData) - { - if (prevLayerData.hasDropOut ()) - { - applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.weightsBegin (), - prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), - prevLayerData.dropOut ()); - } - else - { - applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.weightsBegin (), - prevLayerData.deltasBegin (), prevLayerData.deltasEnd ()); - } - } +template + void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData, std::vector& weightBucket, size_t layerNumber, int BUCKET_SIZE) +{ + if (prevLayerData.hasDropOut ()) + { + applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), + currLayerData.weightsBegin (), weightBucket, layerNumber, BUCKET_SIZE, + prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), + prevLayerData.dropOut ()); + } + else + { + bool dummy = true; + applyWeightsBackwards (currLayerData.deltasBegin (), currLayerData.deltasEnd (), + currLayerData.weightsBegin (), weightBucket, layerNumber, BUCKET_SIZE, + prevLayerData.deltasBegin (), prevLayerData.deltasEnd (), + &dummy); // dummy to use all nodes (no drop out) + } +} + + @@ -636,36 +594,42 @@ namespace TMVA * */ template - void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization) + void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization, std::vector& weightBucket, std::vector& gradientBucket, size_t layerNumber, int BUCKET_SIZE) { // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights if (factorWeightDecay != 0.0) // has weight regularization if (regularization == EnumRegularization::L1) // L1 regularization ( sum(|w|) ) { - update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (), - currLayerData.weightsBegin (), factorWeightDecay); + update (prevLayerData.valuesBegin (),prevLayerData.valuesEnd (), + currLayerData.deltasBegin (), + currLayerData.deltasEnd (), + currLayerData.valueGradientsBegin (), + currLayerData.gradientsBegin (), gradientBucket, + currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber, BUCKET_SIZE); } else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) ) { - update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (), - currLayerData.weightsBegin (), factorWeightDecay); + update (prevLayerData.valuesBegin (),prevLayerData.valuesEnd (), + currLayerData.deltasBegin (), + currLayerData.deltasEnd (), + currLayerData.valueGradientsBegin (), + currLayerData.gradientsBegin (), gradientBucket, + currLayerData.weightsBegin (), weightBucket, factorWeightDecay, layerNumber, BUCKET_SIZE); } else { update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ()); + currLayerData.valueGradientsBegin (), + currLayerData.gradientsBegin (), gradientBucket, layerNumber, BUCKET_SIZE); } else { // no weight regularization update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (), - currLayerData.deltasBegin (), currLayerData.deltasEnd (), - currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ()); + currLayerData.deltasBegin (), currLayerData.deltasEnd (), + currLayerData.valueGradientsBegin (), + currLayerData.gradientsBegin (), gradientBucket, layerNumber, BUCKET_SIZE); } } @@ -687,28 +651,30 @@ namespace TMVA * the weights have to be adjusted to account for the different number of active nodes * this function computes the factor and applies it to the weights */ - template - void Net::dropOutWeightFactor (WeightsType& weights, + template + void Net::dropOutWeightFactor (std::vector& weightBucket, const DropProbabilities& drops, bool inverse) { - if (drops.empty () || weights.empty ()) + if (drops.empty () || weightBucket.empty ()) return; - auto itWeight = std::begin (weights); - auto itWeightEnd = std::end (weights); + int itWeightBucket = 0; + int itWeightBucketEnd = (int) weightBucket.size(); auto itDrop = std::begin (drops); auto itDropEnd = std::end (drops); - size_t numNodesPrev = inputSize (); + // size_t numNodesPrev = inputSize (); double dropFractionPrev = *itDrop; ++itDrop; + // size_t layerNumber = 0; + for (auto& layer : layers ()) { if (itDrop == itDropEnd) break; - size_t numNodes = layer.numNodes (); + // size_t _numNodes = layer.numNodes (); double dropFraction = *itDrop; double pPrev = 1.0 - dropFractionPrev; @@ -719,18 +685,19 @@ namespace TMVA { p = 1.0/p; } - size_t _numWeights = layer.numWeights (numNodesPrev); - for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight) + // size_t _numWeights = layer.numWeights (numNodesPrev); + for (size_t iWeightBucket = 0; iWeightBucket < m_bucketSize; ++iWeightBucket) { - if (itWeight == itWeightEnd) + if (itWeightBucket == itWeightBucketEnd) break; - *itWeight *= p; - ++itWeight; + weightBucket[itWeightBucket] *= p; + ++itWeightBucket; } - numNodesPrev = numNodes; + // numNodesPrev = _numNodes; dropFractionPrev = dropFraction; ++itDrop; + // ++layerNumber; } } @@ -748,10 +715,11 @@ namespace TMVA * \param settings the settings for the training (e.g. multithreading or not, regularization etc.) */ template - double Net::train (std::vector& weights, + double Net::train (std::vector& weightBucket, std::vector& layerWeightNumber, std::vector& trainPattern, const std::vector& testPattern, - Minimizer& minimizer, Settings& settings) + Minimizer& minimizer, + Settings& settings) { // std::cout << "START TRAINING" << std::endl; settings.startTrainCycle (); @@ -760,6 +728,8 @@ namespace TMVA settings.create ("trainErrors", 100, 0, 100, 100, 0,1); settings.create ("testErrors", 100, 0, 100, 100, 0,1); + m_bucketSize = settings.bucketSize (); + size_t cycleCount = 0; size_t testCycleCount = 0; double testError = 1e20; @@ -771,7 +741,6 @@ namespace TMVA const std::vector& dropFractions = settings.dropFractions (); bool isWeightsForDrop = false; - // until convergence do { @@ -783,39 +752,40 @@ namespace TMVA { // fill the dropOut-container dropContainer.clear (); - size_t numNodes = inputSize (); + size_t _numNodes = inputSize (); double dropFraction = 0.0; dropFraction = dropFractions.at (dropIndex); ++dropIndex; - fillDropContainer (dropContainer, dropFraction, numNodes); + fillDropContainer (dropContainer, dropFraction, _numNodes); for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex) { auto& layer = *itLayer; - numNodes = layer.numNodes (); + _numNodes = layer.numNodes (); // how many nodes have to be dropped dropFraction = 0.0; if (dropFractions.size () > dropIndex) dropFraction = dropFractions.at (dropIndex); - fillDropContainer (dropContainer, dropFraction, numNodes); + fillDropContainer (dropContainer, dropFraction, _numNodes); } isWeightsForDrop = true; } // execute training cycle - trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer); + trainError = trainCycle (minimizer, weightBucket, layerWeightNumber, begin (trainPattern), end (trainPattern), settings, dropContainer); - // check if we execute a test + // ------ check if we have to execute a test ------------------ bool hasConverged = false; - if (testCycleCount % settings.testRepetitions () == 0) + if (testCycleCount % settings.testRepetitions () == 0) // we test only everye "testRepetitions" repetition { if (isWeightsForDrop) { - dropOutWeightFactor (weights, dropFractions); + dropOutWeightFactor (weightBucket, dropFractions); isWeightsForDrop = false; } + testError = 0; //double weightSum = 0; settings.startTestCycle (); @@ -825,13 +795,14 @@ namespace TMVA size_t patternPerThread = testPattern.size () / numThreads; std::vector batches; auto itPat = testPattern.begin (); - auto itPatEnd = testPattern.end (); + // auto itPatEnd = testPattern.end (); for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread) { batches.push_back (Batch (itPat, itPat + patternPerThread)); itPat += patternPerThread; } - batches.insert (batches.end (), Batch (itPat, itPatEnd)); + if (itPat != testPattern.end ()) + batches.push_back (Batch (itPat, testPattern.end ())); std::vector>>> futures; for (auto& batch : batches) @@ -841,62 +812,68 @@ namespace TMVA std::async (std::launch::async, [&]() { std::vector localOutput; - std::tuple passThrough (settings, batch, dropContainerTest); - double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput); + pass_through_type passThrough (settings, batch, dropContainerTest); + double testBatchError = (*this) (passThrough, weightBucket, ModeOutput::FETCH, localOutput); return std::make_tuple (testBatchError, localOutput); }) ); } + auto itBatch = batches.begin (); for (auto& f : futures) { std::tuple> result = f.get (); testError += std::get<0>(result) / batches.size (); std::vector output = std::get<1>(result); - if (output.size () == testPattern.size ()) + + //if (output.size () == testPattern.size ()) { - auto it = begin (testPattern); + //auto it = begin (testPattern); + auto it = (*itBatch).begin (); for (double out : output) { settings.testSample (0, out, (*it).output ().at (0), (*it).weight ()); ++it; } } + ++itBatch; } } else { std::vector output; - for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it) + //for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it) { - const Pattern& p = (*it); - double weight = p.weight (); - Batch batch (it, it+1); + //const Pattern& p = (*it); + //double weight = p.weight (); + //Batch batch (it, it+1); + Batch batch (begin (testPattern), end (testPattern)); output.clear (); - std::tuple passThrough (settings, batch, dropContainerTest); - double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output); - if (output.size () == 1) + pass_through_type passThrough (settings, batch, dropContainerTest); + double testPatternError = (*this) (passThrough, weightBucket, ModeOutput::FETCH, output); + + auto it = batch.begin (); + for (double out : output) { - /* std::vector out = (*this).compute (p.input (), weights); */ - /* assert (output.at (0) == out.at (0)); */ - settings.testSample (testPatternError, output.at (0), p.output ().at (0), weight); + settings.testSample (0, out, (*it).output ().at (0), (*it).weight ()); + ++it; } //weightSum += fabs (weight); //testError += testPatternError*weight; - testError += testPatternError; + testError += testPatternError; /// batch.size (); } - testError /= testPattern.size (); + // testError /= testPattern.size (); } settings.endTestCycle (); // testError /= weightSum; - settings.computeResult (*this, weights); + settings.computeResult (*this, weightBucket); hasConverged = settings.hasConverged (testError); if (!hasConverged && !isWeightsForDrop) { - dropOutWeightFactor (weights, dropFractions, true); // inverse + dropOutWeightFactor (weightBucket, dropFractions, true); // inverse isWeightsForDrop = true; } } @@ -953,13 +930,14 @@ namespace TMVA * \param dropContainer the data for dropping-out nodes (regularization technique) */ template - inline double Net::trainCycle (Minimizer& minimizer, std::vector& weights, + double Net::trainCycle (Minimizer& minimizer, std::vector& weightBucket, std::vector& layerWeightNumber, Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer) { double error = 0.0; size_t numPattern = std::distance (itPatternBegin, itPatternEnd); size_t numBatches = numPattern/settings.batchSize (); size_t numBatches_stored = numBatches; + const int const_m_bucketSize = m_bucketSize; std::random_shuffle (itPatternBegin, itPatternEnd); Iterator itPatternBatchBegin = itPatternBegin; @@ -1013,8 +991,8 @@ namespace TMVA for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it) { Batch& batch = *it; - std::tuple settingsAndBatch (settings, batch, dropContainer); - localError += minimizer ((*this), weights, settingsAndBatch); /// call the minimizer + pass_through_type settingsAndBatch (settings, batch, dropContainer); + localError += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber, const_m_bucketSize); /// call the minimizer } return localError; }) @@ -1028,8 +1006,8 @@ namespace TMVA { for (auto& batch : batches) { - std::tuple settingsAndBatch (settings, batch, dropContainer); - error += minimizer ((*this), weights, settingsAndBatch); + pass_through_type settingsAndBatch (settings, batch, dropContainer); + error += minimizer ((*this), weightBucket, settingsAndBatch, numWeights (), layerWeightNumber, const_m_bucketSize); } } @@ -1050,15 +1028,17 @@ namespace TMVA * \param weights the weight data */ template - std::vector Net::compute (const std::vector& input, const Weights& weights) const + std::vector Net::compute (const std::vector& input, Weights& weightBucket, int BUCKET_SIZE) const { std::vector layerData; layerData.reserve (m_layers.size ()+1); - auto itWeight = begin (weights); + int itWeight = 0; auto itInputBegin = begin (input); auto itInputEnd = end (input); layerData.push_back (LayerData (itInputBegin, itInputEnd)); size_t numNodesPrev = input.size (); + + // -------------------- prepare layer data with one pattern ------------------------------- for (auto& layer: m_layers) { layerData.push_back (LayerData (layer.numNodes (), itWeight, @@ -1071,331 +1051,414 @@ namespace TMVA // --------- forward ------------- - size_t idxLayer = 0, idxLayerEnd = m_layers.size (); - for (; idxLayer < idxLayerEnd; ++idxLayer) - { - LayerData& prevLayerData = layerData.at (idxLayer); - LayerData& currLayerData = layerData.at (idxLayer+1); - - forward (prevLayerData, currLayerData); - applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); - } + forwardPattern (m_layers, layerData, weightBucket, BUCKET_SIZE); // ------------- fetch output ------------------ - if (TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.back ().outputMode ())) - { std::vector output; - output.assign (layerData.back ().valuesBegin (), layerData.back ().valuesEnd ()); - return output; - } - std::vector output (layerData.back ().probabilities ()); + fetchOutput (layerData.back (), output); return output; } - template - double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const + template + double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward - assert (numWeights () == weights.size ()); - double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false); + + double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, 0, 10000, nothing, false, weightBucket, nothing); return error; } - template - double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput /*eFetch*/, OutContainer& outputContainer) const + template + double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, ModeOutput /*eFetch*/, OutContainer& outputContainer) const { std::vector nothing; // empty gradients; no backpropagation is done, just forward - assert (numWeights () == weights.size ()); - double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true); + + double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, 0, 10000, outputContainer, true, weightBucket, nothing); return error; } - template - double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, Gradients& gradients) const + template + double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket) const { std::vector nothing; - assert (numWeights () == weights.size ()); - assert (weights.size () == gradients.size ()); - double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::begin (gradients), std::end (gradients), 0, nothing, false); + // std::cout<<"\nnumWeights = "< - double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const + template + double Net::operator() (PassThrough& settingsAndBatch, std::vector& weightBucket, std::vector& gradientBucket, ModeOutput eFetch, OutContainer& outputContainer) const { MATH_UNUSED(eFetch); - assert (numWeights () == weights.size ()); - assert (weights.size () == gradients.size ()); - double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true); + + assert (weightBucket.size () == gradientBucket.size ()); + double error = forward_backward(m_layers, settingsAndBatch, 0, numWeights () - 1, 0, numWeights () - 1, 0, outputContainer, true, weightBucket, gradientBucket); return error; } + template + std::vector> Net::prepareLayerData (LayerContainer& _layers, + Batch& batch, + const DropContainer& dropContainer, + int itWeightBegin, + int /*itWeightEnd*/, + int itGradientBegin, + int itGradientEnd, + size_t& totalNumWeights) const + { + LayerData::const_dropout_iterator itDropOut; + bool usesDropOut = !dropContainer.empty (); + if (usesDropOut) + itDropOut = std::begin (dropContainer); + + if (_layers.empty ()) + throw std::string ("no layers in this net"); + + + // ----------- create layer data ------------------------------------------------------- + assert (_layers.back ().numNodes () == outputSize ()); + totalNumWeights = 0; + size_t totalNumNodes = 0; + std::vector> layerPatternData; + layerPatternData.reserve (_layers.size ()+1); + int itWeight = itWeightBegin; + int itGradient = itGradientBegin; + size_t numNodesPrev = inputSize (); + typename Pattern::const_iterator itInputBegin; + typename Pattern::const_iterator itInputEnd; -/*! \brief forward propagation and backward propagation - * - * - */ - template - double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch, - ItWeight itWeightBegin, - ItGradient itGradientBegin, ItGradient itGradientEnd, - size_t trainFromLayer, - OutContainer& outputContainer, bool fetchOutput) const - { - Settings& settings = std::get<0>(settingsAndBatch); - Batch& batch = std::get<1>(settingsAndBatch); - DropContainer& dropContainer = std::get<2>(settingsAndBatch); - bool doBatchNormalization = settings.doBatchNormalization (); - bool usesDropOut = !dropContainer.empty (); + // ItWeight itGammaBegin = itWeightBegin + numWeights (); + // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes (); + // ItGradient itGradGammaBegin = itGradientBegin + numWeights (); + // ItGradient itGradBetaBegin = itGradientBegin + numWeights () + numNodes (); - LayerData::const_dropout_iterator itDropOut; - if (usesDropOut) - itDropOut = std::begin (dropContainer); - if (_layers.empty ()) - { - std::cout << "no layers in this net" << std::endl; - throw std::string ("no layers in this net"); - } + // --------------------- prepare layer data for input layer ---------------------------- + layerPatternData.push_back (std::vector()); + for (const Pattern& _pattern : batch) + { + std::vector& layerData = layerPatternData.back (); + layerData.push_back (LayerData (numNodesPrev)); + itInputBegin = _pattern.beginInput (); + itInputEnd = _pattern.endInput (); + layerData.back ().setInput (itInputBegin, itInputEnd); + + if (usesDropOut) + layerData.back ().setDropOut (itDropOut); - double sumError = 0.0; - double sumWeights = 0.0; // ------------- + } + + + if (usesDropOut) + itDropOut += _layers.back ().numNodes (); - // ----------- create layer data ------------------------------------------------------- - assert (_layers.back ().numNodes () == outputSize ()); - size_t totalNumWeights = 0; - std::vector> layerPatternData; - layerPatternData.reserve (_layers.size ()+1); - ItWeight itWeight = itWeightBegin; - ItGradient itGradient = itGradientBegin; - size_t numNodesPrev = inputSize (); - typename Pattern::const_iterator itInputBegin; - typename Pattern::const_iterator itInputEnd; - - // --------------------- prepare layer data for input layer ---------------------------- + // ---------------- prepare subsequent layers --------------------------------------------- + // for each of the layers + for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer) + { + bool isOutputLayer = (itLayer+1 == itLayerEnd); + bool isFirstHiddenLayer = (itLayer == begin (_layers)); + + auto& layer = *itLayer; layerPatternData.push_back (std::vector()); - layerPatternData.back () . reserve(batch.size()); + // for each pattern, prepare a layerData for (const Pattern& _pattern : batch) { std::vector& layerData = layerPatternData.back (); - layerData.push_back (LayerData (numNodesPrev)); + //layerData.push_back (LayerData (numNodesPrev)); + + if (itGradientBegin == itGradientEnd) + { + layerData.push_back (LayerData (layer.numNodes (), itWeight, + layer.activationFunction (), + layer.modeOutputValues ())); + } + else + { + layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient, + layer.activationFunction (), + layer.inverseActivationFunction (), + layer.modeOutputValues ())); + } - itInputBegin = _pattern.beginInput (); - itInputEnd = _pattern.endInput (); - layerData.back ().setInput (itInputBegin, itInputEnd); - if (usesDropOut) { layerData.back ().setDropOut (itDropOut); } + } + if (usesDropOut) { - itDropOut += _layers.back ().numNodes (); + itDropOut += layer.numNodes (); } + size_t _numWeights = layer.numWeights (numNodesPrev); + totalNumWeights += _numWeights; + itWeight += _numWeights; + itGradient += _numWeights; + numNodesPrev = layer.numNodes (); + totalNumNodes += numNodesPrev; - // ---------------- prepare subsequent layers --------------------------------------------- - // for each of the layers - for (auto& layer: _layers) - { - layerPatternData.push_back (std::vector()); - layerPatternData.back () . reserve(batch.size()); - // for each pattern, prepare a layerData - for (const Pattern& _pattern : batch) - { - std::vector& layerData = layerPatternData.back (); - //layerData.push_back (LayerData (numNodesPrev)); + } + assert (totalNumWeights > 0); + return layerPatternData; +} - if (itGradientBegin == itGradientEnd) - layerData.push_back (LayerData (layer.numNodes (), itWeight, - layer.activationFunction (), - layer.modeOutputValues ())); - else - layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient, - layer.activationFunction (), - layer.inverseActivationFunction (), - layer.modeOutputValues ())); - if (usesDropOut) - { - layerData.back ().setDropOut (itDropOut); - } - } - - if (usesDropOut) - { - itDropOut += layer.numNodes (); - } - size_t _numWeights = layer.numWeights (numNodesPrev); - totalNumWeights += _numWeights; - itWeight += _numWeights; - itGradient += _numWeights; - numNodesPrev = layer.numNodes (); - } - assert (totalNumWeights > 0); + template + void Net::forwardPattern (const LayerContainer& _layers, + std::vector& layerData, std::vector& weightBucket, int BUCKET_SIZE) const + { + size_t idxLayer = 0, idxLayerEnd = _layers.size (); + size_t cumulativeNodeCount = 0; + for (; idxLayer < idxLayerEnd; ++idxLayer) + { + LayerData& prevLayerData = layerData.at (idxLayer); + LayerData& currLayerData = layerData.at (idxLayer+1); + + forward (prevLayerData, currLayerData, weightBucket, idxLayer, BUCKET_SIZE); + applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); + } + } - // ---------------------------------- loop over layers and pattern ------------------------------------------------------- - for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer) //std::vector& layerPattern : layerPatternData) - { - bool doTraining = idxLayer >= trainFromLayer; - // get layer-pattern data for this and the corresponding one from the next layer - std::vector& prevLayerPatternData = layerPatternData.at (idxLayer); - std::vector& currLayerPatternData = layerPatternData.at (idxLayer+1); - size_t numPattern = prevLayerPatternData.size (); - std::vector means (_layers.at (idxLayer).numNodes ()); - // ---------------- loop over layerDatas of pattern compute forward ---------------------------- - for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) - { - const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern); - LayerData& currLayerData = currLayerPatternData.at (idxPattern); + template + void Net::forwardBatch (const LayerContainer& _layers, + LayerPatternContainer& layerPatternData, + std::vector& valuesMean, + std::vector& valuesStdDev, + size_t trainFromLayer, std::vector& weightBucket) const + { + valuesMean.clear (); + valuesStdDev.clear (); + + // ---------------------------------- loop over layers and pattern ------------------------------------------------------- + size_t cumulativeNodeCount = 0; + for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer) + { + bool doTraining = idxLayer >= trainFromLayer; + + // get layer-pattern data for this and the corresponding one from the next layer + std::vector& prevLayerPatternData = layerPatternData.at (idxLayer); + std::vector& currLayerPatternData = layerPatternData.at (idxLayer+1); + + size_t numPattern = prevLayerPatternData.size (); + size_t numNodesLayer = _layers.at (idxLayer).numNodes (); + + std::vector means (numNodesLayer); + // ---------------- loop over layerDatas of pattern compute forward ---------------------------- + for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) + { + const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern); + LayerData& currLayerData = currLayerPatternData.at (idxPattern); - if (doTraining) - forward_training (prevLayerData, currLayerData); - else - forward (prevLayerData, currLayerData); + forward (prevLayerData, currLayerData, weightBucket, idxLayer, m_bucketSize); // feed forward + } + + // ---------------- loop over layerDatas of pattern apply non-linearities ---------------------------- + for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) + { + //const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern); + LayerData& currLayerData = currLayerPatternData.at (idxPattern); + + if (doTraining) + applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (), + currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ()); + else + applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); + } + // accumulate node count + cumulativeNodeCount += numNodesLayer; + } +} - // -------- compute batch mean and variance if batch normalization is turned on ------------------ - if (doBatchNormalization && doTraining) - { -// means.at (idxPattern).add (*(prevLayerData.valuesBegin ()+idxPattern)); - } - } - // ---------------- do batch normalization ---------------------------- - if (doBatchNormalization) - { - if (doTraining) // take means and variances from batch - { - for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) - { - } - } - else // take average mean and variance for batch normalization - { - } - } - - // ---------------- loop over layerDatas of pattern apply non-linearities ---------------------------- - for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern) - { -// const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern); - LayerData& currLayerData = currLayerPatternData.at (idxPattern); - - if (doTraining) - applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (), - currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ()); - else - applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ()); - } - } + template + void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const + { + ModeOutputValues eModeOutput = lastLayerData.outputMode (); + if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput)) + { + outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ()); + } + else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) || + isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput)) + { + const auto& prob = lastLayerData.probabilities (); + outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ; + } + else + assert (false); + } + + + + + template + void Net::fetchOutput (const std::vector& lastLayerPatternData, OutputContainer& outputContainer) const + { + for (const LayerData& lastLayerData : lastLayerPatternData) + fetchOutput (lastLayerData, outputContainer); + } + template + std::tuple Net::computeError (const Settings& settings, + std::vector& lastLayerData, + Batch& batch, Weights& weightBucket) const + { + typename std::vector::iterator itLayerData = lastLayerData.begin (); + + typename std::vector::iterator itLayerDataNext = itLayerData; + ++itLayerDataNext; + + typename std::vector::iterator itLayerDataEnd = lastLayerData.end (); + + typename std::vector::const_iterator itPattern = batch.begin (); + typename std::vector::const_iterator itPatternEnd = batch.end (); + + double sumWeights (0.0); + double sumError (0.0); - // ------------- fetch output ------------------ - if (fetchOutput) + size_t idxPattern = 0; + for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData, ++itLayerDataNext, ++idxPattern) + { + + // compute E and the deltas of the computed output and the true output + LayerData& layerData = (*itLayerData); + LayerData& nextLayerData = (*itLayerDataNext); + const Pattern& _pattern = (*itPattern); + double error = errorFunction (layerData, nextLayerData, _pattern.output (), + _pattern.weight (), weightBucket, settings.factorWeightDecay (), + settings.regularization (), idxPattern); + sumWeights += fabs (_pattern.weight ()); + sumError += error; + } + return std::make_tuple (sumError, sumWeights); + } + + + + template + void Net::backPropagate (std::vector>& layerPatternData, std::vector& weightBucket, std::vector& gradientBucket, + const Settings& settings, + size_t trainFromLayer, + size_t totalNumWeights) const + { + bool doTraining = layerPatternData.size () > trainFromLayer; + if (doTraining) // training + { + // ------------- backpropagation ------------- + size_t idxLayer = layerPatternData.size (); + for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend (); + itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData) { - for (LayerData& lastLayerData : layerPatternData.back ()) + --idxLayer; + if (idxLayer <= trainFromLayer) // no training + break; + + std::vector& currLayerDataColl = *(itLayerPatternData); + std::vector& prevLayerDataColl = *(itLayerPatternData+1); + + size_t idxPattern = 0; + for (typename std::vector::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl), + itPrevLayerData = begin (prevLayerDataColl), itPrevLayerDataEnd = end (prevLayerDataColl); + itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern) { - ModeOutputValues eModeOutput = lastLayerData.outputMode (); - if (TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, eModeOutput)) - { - outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ()); - } - else if (TMVA::DNN::isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) || - TMVA::DNN::isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput)) - { - const auto& probs = lastLayerData.probabilities (); - outputContainer.insert (outputContainer.end (), probs.begin (), probs.end ()); - } - else - assert (false); + LayerData& currLayerData = (*itCurrLayerData); + LayerData& prevLayerData = *(itPrevLayerData); + + backward (prevLayerData, currLayerData, weightBucket, idxLayer-1, m_bucketSize); + + // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses) + // because L1 and L2 regularization + // + // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization + // + // L1 : -factorWeightDecay*sgn(w)/numWeights + // L2 : -factorWeightDecay/numWeights + update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization (), weightBucket, gradientBucket, idxLayer-1, m_bucketSize); } } + } + } - // ------------- error computation ------------- - std::vector& lastLayerData = layerPatternData.back (); - bool doTraining = layerPatternData.size () > trainFromLayer; +/*! \brief forward propagation and backward propagation + * + * + */ + template + double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch, + int itWeightBegin, int itWeightEnd, + int itGradientBegin, int itGradientEnd, + size_t trainFromLayer, + OutContainer& outputContainer, bool doFetchOutput, std::vector& weightBucket, std::vector& gradientBucket) const + { + Settings& settings = std::get<0>(settingsAndBatch); + Batch& batch = std::get<1>(settingsAndBatch); + DropContainer& dropContainer = std::get<2>(settingsAndBatch); + + double sumError = 0.0; + double sumWeights = 0.0; // ------------- - typename std::vector::iterator itLayerData = lastLayerData.begin (); - typename std::vector::iterator itLayerDataEnd = lastLayerData.end (); - typename std::vector::const_iterator itPattern = batch.begin (); - typename std::vector::const_iterator itPatternEnd = batch.end (); + // ----------------------------- prepare layer data ------------------------------------- + size_t totalNumWeights (0); + std::vector> layerPatternData = prepareLayerData (_layers, + batch, + dropContainer, + itWeightBegin, + itWeightEnd, + itGradientBegin, + itGradientEnd, + totalNumWeights); - size_t idxPattern = 0; - for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData) - { - ++idxPattern; - - // compute E and the deltas of the computed output and the true output - LayerData& layerData = (*itLayerData); - const Pattern& _pattern = (*itPattern); - itWeight = itWeightBegin; - double error = errorFunction (layerData, _pattern.output (), - itWeight, itWeight + totalNumWeights, - _pattern.weight (), settings.factorWeightDecay (), - settings.regularization ()); - sumWeights += fabs (_pattern.weight ()); - sumError += error; - } - if (doTraining) // training + + // ---------------------------------- propagate forward ------------------------------------------------------------------ + std::vector valuesMean; + std::vector valuesStdDev; + forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer, weightBucket); + + + // ------------- fetch output ------------------ + if (doFetchOutput) { + fetchOutput (layerPatternData.back (), outputContainer); + } + + + // ------------- error computation ------------- + std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, weightBucket); + + // ------------- backpropagation ------------- - size_t idxLayer = layerPatternData.size (); - for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend (); - itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData) - { - --idxLayer; - if (idxLayer <= trainFromLayer) // no training - break; + backPropagate (layerPatternData, weightBucket, gradientBucket, settings, trainFromLayer, totalNumWeights); - std::vector& currLayerDataColl = *(itLayerPatternData); - std::vector& prevLayerDataColl = *(itLayerPatternData+1); - idxPattern = 0; - for (typename std::vector::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl), - itPrevLayerData = begin (prevLayerDataColl), itPrevLayerDataEnd = end (prevLayerDataColl); - itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern) - { - LayerData& currLayerData = (*itCurrLayerData); - LayerData& prevLayerData = *(itPrevLayerData); - - backward (prevLayerData, currLayerData); - - // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses) - // because L1 and L2 regularization - // - // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization - // - // L1 : -factorWeightDecay*sgn(w)/numWeights - // L2 : -factorWeightDecay/numWeights - update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ()); - } - } - } - + // --- compile the measures double batchSize = std::distance (std::begin (batch), std::end (batch)); - for (auto it = itGradientBegin; it != itGradientEnd; ++it) + for (auto it = gradientBucket.begin(); it != gradientBucket.end(); ++it) (*it) /= batchSize; @@ -1410,7 +1473,7 @@ namespace TMVA * */ template - void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight) + void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight, std::vector& layerWeightNumber, int BUCKET_SIZE) { if (eInitStrategy == WeightInitializationStrategy::XAVIER) { @@ -1419,18 +1482,20 @@ namespace TMVA // compute variance and mean of input and output //... - + // compute the weights for (auto& layer: layers ()) { double nIn = numInput; double stdDev = sqrt (2.0/nIn); - for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight) + // for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) + for (size_t iWeight = 0, iWeightEnd = (BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) { (*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU ++itWeight; } + layerWeightNumber.push_back((int)layer.numWeights (numInput)); numInput = layer.numNodes (); } return; @@ -1451,12 +1516,14 @@ namespace TMVA double nIn = numInput; double minVal = -sqrt(2.0/nIn); double maxVal = sqrt (2.0/nIn); - for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight) + // for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) + for (size_t iWeight = 0, iWeightEnd = (BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) { (*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU ++itWeight; } + layerWeightNumber.push_back((int)layer.numWeights (numInput)); numInput = layer.numNodes (); } return; @@ -1475,11 +1542,13 @@ namespace TMVA for (auto& layer: layers ()) { // double nIn = numInput; - for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight) + // for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) + for (size_t iWeight = 0, iWeightEnd = (BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) { (*itWeight) = DNN::gaussDouble (0.0, 0.1); ++itWeight; } + layerWeightNumber.push_back((int)layer.numWeights (numInput)); numInput = layer.numNodes (); } return; @@ -1498,11 +1567,13 @@ namespace TMVA for (auto& layer: layers ()) { double nIn = numInput; - for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight) + // for (size_t iWeight = 0, iWeightEnd = (layer.numWeights (numInput) / BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) + for (size_t iWeight = 0, iWeightEnd = (BUCKET_SIZE); iWeight < iWeightEnd; ++iWeight) { (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU ++itWeight; } + layerWeightNumber.push_back((int)layer.numWeights (numInput)); numInput = layer.numNodes (); } return; @@ -1518,14 +1589,14 @@ namespace TMVA * * */ - template + template double Net::errorFunction (LayerData& layerData, + LayerData& nextLayerData, Container truth, - ItWeight itWeight, - ItWeight itWeightEnd, double patternWeight, + std::vector& weightBucket, double factorWeightDecay, - EnumRegularization eRegularization) const + EnumRegularization eRegularization, size_t layerNumber) const { double error (0); switch (m_eErrorFunction) @@ -1563,7 +1634,7 @@ namespace TMVA } if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE) { - error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization); + error = weightDecay (error, layerData.weightsBegin (), nextLayerData.weightsBegin (), weightBucket, factorWeightDecay, eRegularization, layerNumber, m_bucketSize); } return error; } @@ -1574,118 +1645,115 @@ namespace TMVA - -/*! \brief pre-training - * - * in development - */ - template - void Net::preTrain (std::vector& weights, - std::vector& trainPattern, - const std::vector& testPattern, - Minimizer& minimizer, Settings& settings) - { - auto itWeightGeneral = std::begin (weights); - std::vector prePatternTrain (trainPattern.size ()); - std::vector prePatternTest (testPattern.size ()); - - size_t _inputSize = inputSize (); - - // transform pattern using the created preNet - auto initializePrePattern = [&](const std::vector& pttrnInput, std::vector& pttrnOutput) - { - pttrnOutput.clear (); - std::transform (std::begin (pttrnInput), std::end (pttrnInput), - std::back_inserter (pttrnOutput), - [](const Pattern& p) - { - Pattern pat (p.input (), p.input (), p.weight ()); - return pat; - }); - }; - - initializePrePattern (trainPattern, prePatternTrain); - initializePrePattern (testPattern, prePatternTest); - - std::vector originalDropFractions = settings.dropFractions (); - - for (auto& _layer : layers ()) - { - // compute number of weights (as a function of the number of incoming nodes) - // fetch number of nodes - size_t numNodes = _layer.numNodes (); - size_t _numWeights = _layer.numWeights (_inputSize); - - // ------------------ - DNN::Net preNet; - if (!originalDropFractions.empty ()) - { - originalDropFractions.erase (originalDropFractions.begin ()); - settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ()); - } - std::vector preWeights; - - // define the preNet (pretraining-net) for this layer - // outputSize == inputSize, because this is an autoencoder; - preNet.setInputSize (_inputSize); - preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ())); - preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT)); - preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES); - preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder) - - // initialize weights - preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM, - std::back_inserter (preWeights)); - - // overwrite already existing weights from the "general" weights - std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()); - std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer +// /*! \brief pre-training +// * +// * in development +// */ +// template +// void Net::preTrain (std::vector& weights, +// std::vector& trainPattern, +// const std::vector& testPattern, +// Minimizer& minimizer, Settings& settings) +// { +// auto itWeightGeneral = std::begin (weights); +// std::vector prePatternTrain (trainPattern.size ()); +// std::vector prePatternTest (testPattern.size ()); + +// size_t _inputSize = inputSize (); + +// // transform pattern using the created preNet +// auto initializePrePattern = [&](const std::vector& pttrnInput, std::vector& pttrnOutput) +// { +// pttrnOutput.clear (); +// std::transform (std::begin (pttrnInput), std::end (pttrnInput), +// std::back_inserter (pttrnOutput), +// [](const Pattern& p) +// { +// Pattern pat (p.input (), p.input (), p.weight ()); +// return pat; +// }); +// }; + +// initializePrePattern (trainPattern, prePatternTrain); +// initializePrePattern (testPattern, prePatternTest); + +// std::vector originalDropFractions = settings.dropFractions (); + +// for (auto& _layer : layers ()) +// { +// // compute number of weights (as a function of the number of incoming nodes) +// // fetch number of nodes +// size_t numNodes = _layer.numNodes (); +// size_t _numWeights = _layer.numWeights (_inputSize); + +// // ------------------ +// DNN::Net preNet; +// if (!originalDropFractions.empty ()) +// { +// originalDropFractions.erase (originalDropFractions.begin ()); +// settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ()); +// } +// std::vector preWeights; + +// // define the preNet (pretraining-net) for this layer +// // outputSize == inputSize, because this is an autoencoder; +// preNet.setInputSize (_inputSize); +// preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ())); +// preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT)); +// preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES); +// preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder) + +// // initialize weights +// preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM, +// std::back_inserter (preWeights)); + +// // overwrite already existing weights from the "general" weights +// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()); +// std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer - // train the "preNet" - preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings); +// // train the "preNet" +// preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings); - // fetch the pre-trained weights (without the output part of the autoencoder) - std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral); +// // fetch the pre-trained weights (without the output part of the autoencoder) +// std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral); - // advance the iterator on the incoming weights - itWeightGeneral += _numWeights; +// // advance the iterator on the incoming weights +// itWeightGeneral += _numWeights; - // remove the weights of the output layer of the preNet - preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ()); +// // remove the weights of the output layer of the preNet +// preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ()); - // remove the outputLayer of the preNet - preNet.removeLayer (); +// // remove the outputLayer of the preNet +// preNet.removeLayer (); - // set the output size to the number of nodes in the new output layer (== last hidden layer) - preNet.setOutputSize (numNodes); +// // set the output size to the number of nodes in the new output layer (== last hidden layer) +// preNet.setOutputSize (numNodes); - // transform pattern using the created preNet - auto proceedPattern = [&](std::vector& pttrn) - { - std::vector newPttrn; - std::for_each (std::begin (pttrn), std::end (pttrn), - [&preNet,&preWeights,&newPttrn](Pattern& p) - { - std::vector output = preNet.compute (p.input (), preWeights); - Pattern pat (output, output, p.weight ()); - newPttrn.push_back (pat); -// p = pat; - }); - return newPttrn; - }; - - - prePatternTrain = proceedPattern (prePatternTrain); - prePatternTest = proceedPattern (prePatternTest); - - - // the new input size is the output size of the already reduced preNet - _inputSize = preNet.layers ().back ().numNodes (); - } - } - - +// // transform pattern using the created preNet +// auto proceedPattern = [&](std::vector& pttrn) +// { +// std::vector newPttrn; +// std::for_each (std::begin (pttrn), std::end (pttrn), +// [&preNet,&preWeights,&newPttrn](Pattern& p) +// { +// std::vector output = preNet.compute (p.input (), preWeights); +// Pattern pat (output, output, p.weight ()); +// newPttrn.push_back (pat); +// // p = pat; +// }); +// return newPttrn; +// }; + + +// prePatternTrain = proceedPattern (prePatternTrain); +// prePatternTest = proceedPattern (prePatternTest); + + +// // the new input size is the output size of the already reduced preNet +// _inputSize = preNet.layers ().back ().numNodes (); +// } +// } diff --git a/tmva/tmva/src/MethodDNN.cxx b/tmva/tmva/src/MethodDNN.cxx index c16044730c6cc..838ab10bc78f4 100644 --- a/tmva/tmva/src/MethodDNN.cxx +++ b/tmva/tmva/src/MethodDNN.cxx @@ -1,5 +1,5 @@ // @(#)root/tmva $Id$ -// Author: Peter Speckmayer +// Authors: Peter Speckmayer, Aditya Sharma /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * @@ -10,7 +10,8 @@ * Description: * * A neural network implementation * * * - * Authors (alphabetical): * + * Authors (alphabetical): + * Aditya Sharma - CERN, Switzerland * Peter Speckmayer - CERN, Switzerland * * * * Copyright (c) 2005-2015: * @@ -79,9 +80,8 @@ ClassImp(TMVA::MethodDNN) TMVA::MethodDNN::MethodDNN( const TString& jobName, const TString& methodTitle, DataSetInfo& theData, - const TString& theOption, - TDirectory* theTargetDir ) - : MethodBase( jobName, Types::kDNN, methodTitle, theData, theOption, theTargetDir ) + const TString& theOption ) + : MethodBase( jobName, Types::kDNN, methodTitle, theData, theOption) , fResume (false) { // standard constructor @@ -89,9 +89,8 @@ TMVA::MethodDNN::MethodDNN( const TString& jobName, //______________________________________________________________________________ TMVA::MethodDNN::MethodDNN( DataSetInfo& theData, - const TString& theWeightFile, - TDirectory* theTargetDir ) - : MethodBase( Types::kDNN, theData, theWeightFile, theTargetDir ) + const TString& theWeightFile) + : MethodBase( Types::kDNN, theData, theWeightFile) , fResume (false) { // constructor from a weight file @@ -429,6 +428,8 @@ void TMVA::MethodDNN::ProcessOptions() std::vector dropConfig; dropConfig = fetchValue (block, "DropConfig", dropConfig); int dropRepetitions = fetchValue (block, "DropRepetitions", 3); + int bucketSize = fetchValue (block, "BucketSize", 8); + fBucketSize = bucketSize; TMVA::DNN::EnumRegularization eRegularization = TMVA::DNN::EnumRegularization::NONE; if (regularization == "L1") @@ -452,7 +453,7 @@ void TMVA::MethodDNN::ProcessOptions() std::shared_ptr ptrSettings = make_shared ( GetName (), convergenceSteps, batchSize, - testRepetitions, factorWeightDecay, + testRepetitions, factorWeightDecay, bucketSize, eRegularization, fScaleToNumEvents, TMVA::DNN::MinimizerType::fSteepest, learningRate, momentum, repetitions, multithreading); @@ -464,7 +465,7 @@ void TMVA::MethodDNN::ProcessOptions() std::shared_ptr ptrSettings = make_shared ( GetName (), convergenceSteps, batchSize, - testRepetitions, factorWeightDecay, + testRepetitions, factorWeightDecay, bucketSize, eRegularization, TMVA::DNN::MinimizerType::fSteepest, learningRate, momentum, repetitions, multithreading); @@ -475,7 +476,7 @@ void TMVA::MethodDNN::ProcessOptions() std::shared_ptr ptrSettings = make_shared ( GetName (), convergenceSteps, batchSize, - testRepetitions, factorWeightDecay, + testRepetitions, factorWeightDecay, bucketSize, eRegularization, TMVA::DNN::MinimizerType::fSteepest, learningRate, momentum, repetitions, multithreading); @@ -549,15 +550,17 @@ void TMVA::MethodDNN::Train() if (trainPattern.empty () || testPattern.empty ()) return; - // create net and weights + // create net and weight bucket fNet.clear (); - fWeights.clear (); + fWeightBucket.clear (); + + std::vector layerWeightNumber; // if "resume" from saved weights if (fResume) { std::cout << ".. resume" << std::endl; - // std::tie (fNet, fWeights) = ReadWeights (fFileName); + // std::tie (fNet, fWeightBucket) = ReadWeights (fFileName); } else // initialize weights and net { @@ -601,7 +604,7 @@ void TMVA::MethodDNN::Train() // initialize weights fNet.initializeWeights (fWeightInitializationStrategy, - std::back_inserter (fWeights)); + std::back_inserter (fWeightBucket), layerWeightNumber, fBucketSize); } @@ -637,7 +640,7 @@ void TMVA::MethodDNN::Train() if (ptrSettings->minimizerType () == TMVA::DNN::MinimizerType::fSteepest) { DNN::Steepest minimizer (ptrSettings->learningRate (), ptrSettings->momentum (), ptrSettings->repetitions ()); - /*E =*/fNet.train (fWeights, trainPattern, testPattern, minimizer, *ptrSettings.get ()); + /*E =*/fNet.train (fWeightBucket, layerWeightNumber, trainPattern, testPattern, minimizer, *ptrSettings.get ()); } ptrSettings.reset (); Log () << kINFO << Endl; @@ -652,13 +655,13 @@ void TMVA::MethodDNN::Train() //_______________________________________________________________________ Double_t TMVA::MethodDNN::GetMvaValue( Double_t* /*errLower*/, Double_t* /*errUpper*/ ) { - if (fWeights.empty ()) + if (fWeightBucket.empty ()) return 0.0; const std::vector& inputValues = GetEvent ()->GetValues (); std::vector input (inputValues.begin (), inputValues.end ()); input.push_back (1.0); // bias node - std::vector output = fNet.compute (input, fWeights); + std::vector output = fNet.compute (input, fWeightBucket, fBucketSize); if (output.empty ()) return 0.0; @@ -670,8 +673,8 @@ Double_t TMVA::MethodDNN::GetMvaValue( Double_t* /*errLower*/, Double_t* /*errUp const std::vector &TMVA::MethodDNN::GetRegressionValues() { - assert (!fWeights.empty ()); - if (fWeights.empty ()) + assert (!fWeightBucket.empty ()); + if (fWeightBucket.empty ()) return *fRegressionReturnVal; const Event * ev = GetEvent(); @@ -679,7 +682,7 @@ const std::vector &TMVA::MethodDNN::GetRegressionValues() const std::vector& inputValues = ev->GetValues (); std::vector input (inputValues.begin (), inputValues.end ()); input.push_back (1.0); // bias node - std::vector output = fNet.compute (input, fWeights); + std::vector output = fNet.compute (input, fWeightBucket, fBucketSize); if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector(); fRegressionReturnVal->clear(); @@ -717,13 +720,13 @@ const std::vector &TMVA::MethodDNN::GetRegressionValues() const std::vector &TMVA::MethodDNN::GetMulticlassValues() { - if (fWeights.empty ()) + if (fWeightBucket.empty ()) return *fRegressionReturnVal; const std::vector& inputValues = GetEvent ()->GetValues (); std::vector input (inputValues.begin (), inputValues.end ()); input.push_back (1.0); // bias node - std::vector output = fNet.compute (input, fWeights); + std::vector output = fNet.compute (input, fWeightBucket, fBucketSize); // check the output of the network @@ -787,10 +790,10 @@ void TMVA::MethodDNN::AddWeightsXMLTo( void* parent ) const void* weightsxml = gTools().xmlengine().NewChild(nn, 0, "Synapses"); gTools().xmlengine().NewAttr (weightsxml, 0, "InputSize", gTools().StringFromInt((int)fNet.inputSize ())); gTools().xmlengine().NewAttr (weightsxml, 0, "OutputSize", gTools().StringFromInt((int)fNet.outputSize ())); - gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt((int)fWeights.size ())); + gTools().xmlengine().NewAttr (weightsxml, 0, "NumberSynapses", gTools().StringFromInt((int)fNet.numWeights ())); std::stringstream s(""); s.precision( 16 ); - for (std::vector::const_iterator it = fWeights.begin (), itEnd = fWeights.end (); it != itEnd; ++it) + for (std::vector::const_iterator it = fWeightBucket.begin (), itEnd = fWeightBucket.end (); it != itEnd; ++it) { s << std::scientific << (*it) << " "; } @@ -857,11 +860,11 @@ void TMVA::MethodDNN::ReadWeightsFromXML( void* wghtnode ) const char* content = gTools().GetContent (xmlWeights); std::stringstream sstr (content); - for (Int_t iWeight = 0; iWeight> weight; - fWeights.push_back (weight); + fWeightBucket.push_back (weight); } } @@ -1099,9 +1102,9 @@ void TMVA::MethodDNN::checkGradients () fNet.addLayer (DNN::Layer (outputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::SIGMOID)); fNet.setErrorFunction (DNN::ModeErrorFunction::CROSSENTROPY); // net.setErrorFunction (ModeErrorFunction::SUMOFSQUARES); - + const int BUCKET_SIZE = 8; size_t numWeights = fNet.numWeights (inputSize); - std::vector weights (numWeights); + std::vector weightBucket (numWeights / BUCKET_SIZE); //weights.at (0) = 1000213.2; std::vector pattern; @@ -1121,7 +1124,7 @@ void TMVA::MethodDNN::checkGradients () } - DNN::Settings settings (TString ("checkGradients"), /*_convergenceSteps*/ 15, /*_batchSize*/ 1, /*_testRepetitions*/ 7, /*_factorWeightDecay*/ 0, /*regularization*/ TMVA::DNN::EnumRegularization::NONE); + DNN::Settings settings (TString ("checkGradients"), /*_convergenceSteps*/ 15, /*_batchSize*/ 1, /*_testRepetitions*/ 7, /*_factorWeightDecay*/ 0, /*_bucketSize*/ 8, /*regularization*/ TMVA::DNN::EnumRegularization::NONE); size_t improvements = 0; size_t worsenings = 0; @@ -1129,26 +1132,26 @@ void TMVA::MethodDNN::checkGradients () size_t largeDifferences = 0; for (size_t iTest = 0; iTest < 1000; ++iTest) { - TMVA::DNN::uniformDouble (weights, 0.7); - std::vector gradients (numWeights, 0); + TMVA::DNN::uniformDouble (weightBucket, 0.7); + std::vector gradientBucket (numWeights / BUCKET_SIZE, 0); DNN::Batch batch (begin (pattern), end (pattern)); DNN::DropContainer dropContainer; std::tuple settingsAndBatch (settings, batch, dropContainer); - double E = fNet (settingsAndBatch, weights, gradients); - std::vector changedWeights; - changedWeights.assign (weights.begin (), weights.end ()); + double E = fNet (settingsAndBatch, weightBucket, gradientBucket); + std::vector changedWeightBucket; + changedWeightBucket.assign (weightBucket.begin (), weightBucket.end ()); - int changeWeightPosition = TMVA::DNN::randomInt (numWeights); - double dEdw = gradients.at (changeWeightPosition); + int changeWeightPosition = TMVA::DNN::randomInt (numWeights / BUCKET_SIZE); + double dEdw = gradientBucket.at (changeWeightPosition); while (dEdw == 0.0) { - changeWeightPosition = TMVA::DNN::randomInt (numWeights); - dEdw = gradients.at (changeWeightPosition); + changeWeightPosition = TMVA::DNN::randomInt (numWeights / BUCKET_SIZE); + dEdw = gradientBucket.at (changeWeightPosition); } const double gamma = 0.01; double delta = gamma*dEdw; - changedWeights.at (changeWeightPosition) += delta; + changedWeightBucket.at (changeWeightPosition) += delta; if (dEdw == 0.0) { std::cout << "dEdw == 0.0 "; @@ -1156,7 +1159,7 @@ void TMVA::MethodDNN::checkGradients () } assert (dEdw != 0.0); - double Echanged = fNet (settingsAndBatch, changedWeights); + double Echanged = fNet (settingsAndBatch, changedWeightBucket); // double difference = fabs((E-Echanged) - delta*dEdw); double difference = fabs ((E+delta - Echanged)/E); @@ -1185,7 +1188,7 @@ void TMVA::MethodDNN::checkGradients () } else { - // for_each (begin (weights), end (weights), [](double w){ std::cout << w << ", "; }); + // for_each (begin (weightBucket), end (weightBucket), [](double w){ std::cout << w << ", "; }); // std::cout << std::endl; // assert (isOk); } diff --git a/tmva/tmva/src/NeuralNet.cxx b/tmva/tmva/src/NeuralNet.cxx index a8394f649ecc1..b8670cf1e1ccf 100644 --- a/tmva/tmva/src/NeuralNet.cxx +++ b/tmva/tmva/src/NeuralNet.cxx @@ -1,197 +1,213 @@ #include "TMVA/NeuralNet.h" - +#include namespace TMVA { - namespace DNN - { + namespace DNN + { + + int hasherFunction(int a) + { + a = (a+0x7ed55d16) + (a<<12); + a = (a^0xc761c23c) ^ (a>>19); + a = (a+0x165667b1) + (a<<5); + a = (a+0xd3a2646c) ^ (a<<9); + a = (a+0xfd7046c5) + (a<<3); + a = (a^0xb55a4f09) ^ (a>>16); + return std::abs(a); + } - double gaussDouble (double mean, double sigma) - { - static std::default_random_engine generator; - std::normal_distribution distribution (mean, sigma); - return distribution (generator); - } + double gaussDouble (double mean, double sigma) + { + static std::default_random_engine generator; + std::normal_distribution distribution (mean, sigma); + return distribution (generator); + } - double uniformDouble (double minValue, double maxValue) - { - static std::default_random_engine generator; - std::uniform_real_distribution distribution(minValue, maxValue); - return distribution(generator); - } + double uniformDouble (double minValue, double maxValue) + { + static std::default_random_engine generator; + std::uniform_real_distribution distribution(minValue, maxValue); + return distribution(generator); + } - int randomInt (int maxValue) - { - static std::default_random_engine generator; - std::uniform_int_distribution distribution(0,maxValue-1); - return distribution(generator); - } - - - double studenttDouble (double distributionParameter) - { - static std::default_random_engine generator; - std::student_t_distribution distribution (distributionParameter); - return distribution (generator); - } - - - LayerData::LayerData (size_t inputSize) - : m_isInputLayer (true) - , m_hasWeights (false) - , m_hasGradients (false) - , m_eModeOutput (ModeOutputValues::DIRECT) - { - m_size = inputSize; - m_deltas.assign (m_size, 0); - } - - - - LayerData::LayerData (const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput) - : m_isInputLayer (true) - , m_hasWeights (false) - , m_hasGradients (false) - , m_eModeOutput (eModeOutput) - { - m_itInputBegin = itInputBegin; - m_itInputEnd = itInputEnd; - m_size = std::distance (itInputBegin, itInputEnd); - m_deltas.assign (m_size, 0); - } - - - - - LayerData::LayerData (size_t _size, - const_iterator_type itWeightBegin, - iterator_type itGradientBegin, - std::shared_ptr> _activationFunction, - std::shared_ptr> _inverseActivationFunction, - ModeOutputValues eModeOutput) - : m_size (_size) - , m_itConstWeightBegin (itWeightBegin) - , m_itGradientBegin (itGradientBegin) - , m_activationFunction (_activationFunction) - , m_inverseActivationFunction (_inverseActivationFunction) - , m_isInputLayer (false) - , m_hasWeights (true) - , m_hasGradients (true) - , m_eModeOutput (eModeOutput) - { - m_values.assign (_size, 0); - m_deltas.assign (_size, 0); - m_valueGradients.assign (_size, 0); - } - - - - - LayerData::LayerData (size_t _size, const_iterator_type itWeightBegin, - std::shared_ptr> _activationFunction, - ModeOutputValues eModeOutput) - : m_size (_size) - , m_itConstWeightBegin (itWeightBegin) - , m_activationFunction (_activationFunction) - , m_isInputLayer (false) - , m_hasWeights (true) - , m_hasGradients (false) - , m_eModeOutput (eModeOutput) - { - m_values.assign (_size, 0); - } - - - - typename LayerData::container_type LayerData::computeProbabilities () - { - container_type probabilitiesContainer; - if (TMVA::DNN::isFlagSet (ModeOutputValues::SIGMOID, m_eModeOutput)) + int randomInt (int maxValue) + { + static std::default_random_engine generator; + std::uniform_int_distribution distribution(0,maxValue-1); + return distribution(generator); + } + + + double studenttDouble (double distributionParameter) + { + static std::default_random_engine generator; + std::student_t_distribution distribution (distributionParameter); + return distribution (generator); + } + + + LayerData::LayerData (size_t inputSize) + : m_hasDropOut (false) + , m_isInputLayer (true) + , m_hasWeights (false) + , m_hasGradients (false) + , m_eModeOutput (ModeOutputValues::DIRECT) + { + m_size = inputSize; + m_deltas.assign (m_size, 0); + } + + + + LayerData::LayerData (const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput) + : m_hasDropOut (false) + , m_isInputLayer (true) + , m_hasWeights (false) + , m_hasGradients (false) + , m_eModeOutput (eModeOutput) + { + m_itInputBegin = itInputBegin; + m_itInputEnd = itInputEnd; + m_size = std::distance (itInputBegin, itInputEnd); + m_deltas.assign (m_size, 0); + } + + + + + LayerData::LayerData (size_t _size, + int itWeightBegin, + int itGradientBegin, + std::shared_ptr> _activationFunction, + std::shared_ptr> _inverseActivationFunction, + ModeOutputValues eModeOutput) + : m_size (_size) + , m_hasDropOut (false) + , m_itConstWeightBegin (itWeightBegin) + , m_itGradientBegin (itGradientBegin) + , m_activationFunction (_activationFunction) + , m_inverseActivationFunction (_inverseActivationFunction) + , m_isInputLayer (false) + , m_hasWeights (true) + , m_hasGradients (true) + , m_eModeOutput (eModeOutput) + { + m_values.assign (_size, 0); + m_deltas.assign (_size, 0); + m_valueGradients.assign (_size, 0); + } + + + + + LayerData::LayerData (size_t _size, int itWeightBegin, + std::shared_ptr> _activationFunction, + ModeOutputValues eModeOutput) + : m_size (_size) + , m_hasDropOut (false) + , m_itConstWeightBegin (itWeightBegin) + , m_activationFunction (_activationFunction) + , m_inverseActivationFunction () + , m_isInputLayer (false) + , m_hasWeights (true) + , m_hasGradients (false) + , m_eModeOutput (eModeOutput) + { + m_values.assign (_size, 0); + } + + + + typename LayerData::container_type LayerData::computeProbabilities () const + { + container_type probabilitiesContainer; + if (TMVA::DNN::isFlagSet (ModeOutputValues::SIGMOID, m_eModeOutput)) { - std::transform (begin (m_values), end (m_values), std::back_inserter (probabilitiesContainer), (*Sigmoid.get ())); + std::transform (begin (m_values), end (m_values), std::back_inserter (probabilitiesContainer), (*Sigmoid.get ())); } - else if (TMVA::DNN::isFlagSet (ModeOutputValues::SOFTMAX, m_eModeOutput)) + else if (TMVA::DNN::isFlagSet (ModeOutputValues::SOFTMAX, m_eModeOutput)) { - double sum = 0; - probabilitiesContainer = m_values; - std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [&sum](double& p){ p = std::exp (p); sum += p; }); - if (sum != 0) - std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [sum ](double& p){ p /= sum; }); + double sum = 0; + probabilitiesContainer = m_values; + std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [&sum](double& p){ p = std::exp (p); sum += p; }); + if (sum != 0) + std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [sum ](double& p){ p /= sum; }); } - else + else { - probabilitiesContainer.assign (begin (m_values), end (m_values)); + probabilitiesContainer.assign (begin (m_values), end (m_values)); } - return probabilitiesContainer; - } + return probabilitiesContainer; + } - Layer::Layer (size_t _numNodes, EnumFunction _activationFunction, ModeOutputValues eModeOutputValues) - : m_numNodes (_numNodes) - , m_eModeOutputValues (eModeOutputValues) - , m_activationFunctionType (_activationFunction) - { - for (size_t iNode = 0; iNode < _numNodes; ++iNode) + Layer::Layer (size_t _numNodes, EnumFunction _activationFunction, ModeOutputValues eModeOutputValues) + : m_numNodes (_numNodes) + , m_eModeOutputValues (eModeOutputValues) + , m_activationFunctionType (_activationFunction) + { + for (size_t iNode = 0; iNode < _numNodes; ++iNode) { - auto actFnc = Linear; - auto invActFnc = InvLinear; - switch (_activationFunction) - { - case EnumFunction::ZERO: - actFnc = ZeroFnc; - invActFnc = ZeroFnc; - break; - case EnumFunction::LINEAR: - actFnc = Linear; - invActFnc = InvLinear; - break; - case EnumFunction::TANH: - actFnc = Tanh; - invActFnc = InvTanh; - break; - case EnumFunction::RELU: - actFnc = ReLU; - invActFnc = InvReLU; - break; - case EnumFunction::SYMMRELU: - actFnc = SymmReLU; - invActFnc = InvSymmReLU; - break; - case EnumFunction::TANHSHIFT: - actFnc = TanhShift; - invActFnc = InvTanhShift; - break; - case EnumFunction::SOFTSIGN: - actFnc = SoftSign; - invActFnc = InvSoftSign; - break; - case EnumFunction::SIGMOID: - actFnc = Sigmoid; - invActFnc = InvSigmoid; - break; - case EnumFunction::GAUSS: - actFnc = Gauss; - invActFnc = InvGauss; - break; - case EnumFunction::GAUSSCOMPLEMENT: - actFnc = GaussComplement; - invActFnc = InvGaussComplement; - break; - } - m_activationFunction = actFnc; - m_inverseActivationFunction = invActFnc; + auto actFnc = Linear; + auto invActFnc = InvLinear; + switch (_activationFunction) + { + case EnumFunction::ZERO: + actFnc = ZeroFnc; + invActFnc = ZeroFnc; + break; + case EnumFunction::LINEAR: + actFnc = Linear; + invActFnc = InvLinear; + break; + case EnumFunction::TANH: + actFnc = Tanh; + invActFnc = InvTanh; + break; + case EnumFunction::RELU: + actFnc = ReLU; + invActFnc = InvReLU; + break; + case EnumFunction::SYMMRELU: + actFnc = SymmReLU; + invActFnc = InvSymmReLU; + break; + case EnumFunction::TANHSHIFT: + actFnc = TanhShift; + invActFnc = InvTanhShift; + break; + case EnumFunction::SOFTSIGN: + actFnc = SoftSign; + invActFnc = InvSoftSign; + break; + case EnumFunction::SIGMOID: + actFnc = Sigmoid; + invActFnc = InvSigmoid; + break; + case EnumFunction::GAUSS: + actFnc = Gauss; + invActFnc = InvGauss; + break; + case EnumFunction::GAUSSCOMPLEMENT: + actFnc = GaussComplement; + invActFnc = InvGaussComplement; + break; + } + m_activationFunction = actFnc; + m_inverseActivationFunction = invActFnc; } - } + } @@ -202,40 +218,39 @@ namespace TMVA - Settings::Settings (TString name, - size_t _convergenceSteps, size_t _batchSize, size_t _testRepetitions, - double _factorWeightDecay, EnumRegularization eRegularization, - MinimizerType _eMinimizerType, double _learningRate, - double _momentum, int _repetitions, bool _useMultithreading, - bool _doBatchNormalization) - : m_timer (100, name) - , m_minProgress (0) - , m_maxProgress (100) - , m_convergenceSteps (_convergenceSteps) - , m_batchSize (_batchSize) - , m_testRepetitions (_testRepetitions) - , m_factorWeightDecay (_factorWeightDecay) - , count_E (0) - , count_dE (0) - , count_mb_E (0) - , count_mb_dE (0) - , m_regularization (eRegularization) - , fLearningRate (_learningRate) - , fMomentum (_momentum) - , fRepetitions (_repetitions) - , fMinimizerType (_eMinimizerType) - , m_convergenceCount (0) - , m_maxConvergenceCount (0) - , m_minError (1e10) - , m_useMultithreading (_useMultithreading) - , m_doBatchNormalization (_doBatchNormalization) - , fMonitoring (NULL) - { - } + Settings::Settings (TString name, + size_t _convergenceSteps, size_t _batchSize, size_t _testRepetitions, + double _factorWeightDecay, int _bucketSize, EnumRegularization eRegularization, + MinimizerType _eMinimizerType, double _learningRate, + double _momentum, int _repetitions, bool _useMultithreading) + : m_timer (100, name) + , m_minProgress (0) + , m_maxProgress (100) + , m_convergenceSteps (_convergenceSteps) + , m_batchSize (_batchSize) + , m_testRepetitions (_testRepetitions) + , m_factorWeightDecay (_factorWeightDecay) + , count_E (0) + , count_dE (0) + , count_mb_E (0) + , count_mb_dE (0) + , m_regularization (eRegularization) + , fLearningRate (_learningRate) + , fMomentum (_momentum) + , fRepetitions (_repetitions) + , fMinimizerType (_eMinimizerType) + , m_convergenceCount (0) + , m_maxConvergenceCount (0) + , m_minError (1e10) + , m_useMultithreading (_useMultithreading) + , fMonitoring (NULL) + , fBucketSize (_bucketSize) + { + } - Settings::~Settings () - { - } + Settings::~Settings () + { + } @@ -250,260 +265,260 @@ namespace TMVA - /** \brief action to be done when the training cycle is started (e.g. update some monitoring output) - * - */ - void ClassificationSettings::startTrainCycle () - { - if (fMonitoring) + /** \brief action to be done when the training cycle is started (e.g. update some monitoring output) + * + */ + void ClassificationSettings::startTrainCycle () + { + if (fMonitoring) { - create ("ROC", 100, 0, 1, 100, 0, 1); - create ("Significance", 100, 0, 1, 100, 0, 3); - create ("OutputSig", 100, 0, 1); - create ("OutputBkg", 100, 0, 1); - fMonitoring->ProcessEvents (); + create ("ROC", 100, 0, 1, 100, 0, 1); + create ("Significance", 100, 0, 1, 100, 0, 3); + create ("OutputSig", 100, 0, 1); + create ("OutputBkg", 100, 0, 1); + fMonitoring->ProcessEvents (); } - } - - /** \brief action to be done when the training cycle is ended (e.g. update some monitoring output) - * - */ - void ClassificationSettings::endTrainCycle (double /*error*/) - { - if (fMonitoring) fMonitoring->ProcessEvents (); - } - - /** \brief action to be done after the computation of a test sample (e.g. update some monitoring output) - * - */ - void ClassificationSettings::testSample (double /*error*/, double output, double target, double weight) - { + } + + /** \brief action to be done when the training cycle is ended (e.g. update some monitoring output) + * + */ + void ClassificationSettings::endTrainCycle (double /*error*/) + { + if (fMonitoring) fMonitoring->ProcessEvents (); + } + + /** \brief action to be done after the computation of a test sample (e.g. update some monitoring output) + * + */ + void ClassificationSettings::testSample (double /*error*/, double output, double target, double weight) + { - m_output.push_back (output); - m_targets.push_back (target); - m_weights.push_back (weight); - } - - - /** \brief action to be done when the test cycle is started (e.g. update some monitoring output) - * - */ - void ClassificationSettings::startTestCycle () - { - m_output.clear (); - m_targets.clear (); - m_weights.clear (); - } - - /** \brief action to be done when the training cycle is ended (e.g. update some monitoring output) - * - */ - void ClassificationSettings::endTestCycle () - { - if (m_output.empty ()) - return; - double minVal = *std::min_element (begin (m_output), end (m_output)); - double maxVal = *std::max_element (begin (m_output), end (m_output)); - const size_t numBinsROC = 1000; - const size_t numBinsData = 100; - - std::vector truePositives (numBinsROC+1, 0); - std::vector falsePositives (numBinsROC+1, 0); - std::vector trueNegatives (numBinsROC+1, 0); - std::vector falseNegatives (numBinsROC+1, 0); - - std::vector x (numBinsData, 0); - std::vector datSig (numBinsData+1, 0); - std::vector datBkg (numBinsData+1, 0); - - double binSizeROC = (maxVal - minVal)/(double)numBinsROC; - double binSizeData = (maxVal - minVal)/(double)numBinsData; - - double sumWeightsSig = 0.0; - double sumWeightsBkg = 0.0; - - for (size_t b = 0; b < numBinsData; ++b) + m_output.push_back (output); + m_targets.push_back (target); + m_weights.push_back (weight); + } + + + /** \brief action to be done when the test cycle is started (e.g. update some monitoring output) + * + */ + void ClassificationSettings::startTestCycle () + { + m_output.clear (); + m_targets.clear (); + m_weights.clear (); + } + + /** \brief action to be done when the training cycle is ended (e.g. update some monitoring output) + * + */ + void ClassificationSettings::endTestCycle () + { + if (m_output.empty ()) + return; + double minVal = *std::min_element (begin (m_output), end (m_output)); + double maxVal = *std::max_element (begin (m_output), end (m_output)); + const size_t numBinsROC = 1000; + const size_t numBinsData = 100; + + std::vector truePositives (numBinsROC+1, 0); + std::vector falsePositives (numBinsROC+1, 0); + std::vector trueNegatives (numBinsROC+1, 0); + std::vector falseNegatives (numBinsROC+1, 0); + + std::vector x (numBinsData, 0); + std::vector datSig (numBinsData+1, 0); + std::vector datBkg (numBinsData+1, 0); + + double binSizeROC = (maxVal - minVal)/(double)numBinsROC; + double binSizeData = (maxVal - minVal)/(double)numBinsData; + + double sumWeightsSig = 0.0; + double sumWeightsBkg = 0.0; + + for (size_t b = 0; b < numBinsData; ++b) { - double binData = minVal + b*binSizeData; - x.at (b) = binData; + double binData = minVal + b*binSizeData; + x.at (b) = binData; } - if (fabs(binSizeROC) < 0.0001) - return; + if (fabs(binSizeROC) < 0.0001) + return; - for (size_t i = 0, iEnd = m_output.size (); i < iEnd; ++i) + for (size_t i = 0, iEnd = m_output.size (); i < iEnd; ++i) { - double val = m_output.at (i); - double truth = m_targets.at (i); - double weight = m_weights.at (i); + double val = m_output.at (i); + double truth = m_targets.at (i); + double weight = m_weights.at (i); - bool isSignal = (truth > 0.5 ? true : false); + bool isSignal = (truth > 0.5 ? true : false); - if (m_sumOfSigWeights != 0 && m_sumOfBkgWeights != 0) - { - if (isSignal) + if (m_sumOfSigWeights != 0 && m_sumOfBkgWeights != 0) + { + if (isSignal) weight *= m_sumOfSigWeights; - else + else weight *= m_sumOfBkgWeights; - } - - size_t binROC = (val-minVal)/binSizeROC; - size_t binData = (val-minVal)/binSizeData; - - if (isSignal) - { - for (size_t n = 0; n <= binROC; ++n) - { - truePositives.at (n) += weight; - } - for (size_t n = binROC+1; n < numBinsROC; ++n) - { - falseNegatives.at (n) += weight; - } - - datSig.at (binData) += weight; - sumWeightsSig += weight; - } - else - { - for (size_t n = 0; n <= binROC; ++n) - { - falsePositives.at (n) += weight; - } - for (size_t n = binROC+1; n < numBinsROC; ++n) - { - trueNegatives.at (n) += weight; - } - - datBkg.at (binData) += weight; - sumWeightsBkg += weight; - } + } + + size_t binROC = (val-minVal)/binSizeROC; + size_t binData = (val-minVal)/binSizeData; + + if (isSignal) + { + for (size_t n = 0; n <= binROC; ++n) + { + truePositives.at (n) += weight; + } + for (size_t n = binROC+1; n < numBinsROC; ++n) + { + falseNegatives.at (n) += weight; + } + + datSig.at (binData) += weight; + sumWeightsSig += weight; + } + else + { + for (size_t n = 0; n <= binROC; ++n) + { + falsePositives.at (n) += weight; + } + for (size_t n = binROC+1; n < numBinsROC; ++n) + { + trueNegatives.at (n) += weight; + } + + datBkg.at (binData) += weight; + sumWeightsBkg += weight; + } } - std::vector sigEff; - std::vector backRej; + std::vector sigEff; + std::vector backRej; - double bestSignificance = 0; - double bestCutSignificance = 0; + double bestSignificance = 0; + double bestCutSignificance = 0; - double numEventsScaleFactor = 1.0; - if (m_scaleToNumEvents > 0) + double numEventsScaleFactor = 1.0; + if (m_scaleToNumEvents > 0) { - size_t numEvents = m_output.size (); - numEventsScaleFactor = double (m_scaleToNumEvents)/double (numEvents); + size_t numEvents = m_output.size (); + numEventsScaleFactor = double (m_scaleToNumEvents)/double (numEvents); } - clear ("ROC"); - clear ("Significance"); + clear ("ROC"); + clear ("Significance"); - for (size_t i = 0; i < numBinsROC; ++i) + for (size_t i = 0; i < numBinsROC; ++i) { - double tp = truePositives.at (i) * numEventsScaleFactor; - double fp = falsePositives.at (i) * numEventsScaleFactor; - double tn = trueNegatives.at (i) * numEventsScaleFactor; - double fn = falseNegatives.at (i) * numEventsScaleFactor; + double tp = truePositives.at (i) * numEventsScaleFactor; + double fp = falsePositives.at (i) * numEventsScaleFactor; + double tn = trueNegatives.at (i) * numEventsScaleFactor; + double fn = falseNegatives.at (i) * numEventsScaleFactor; - double seff = (tp+fn == 0.0 ? 1.0 : (tp / (tp+fn))); - double brej = (tn+fp == 0.0 ? 0.0 : (tn / (tn+fp))); + double seff = (tp+fn == 0.0 ? 1.0 : (tp / (tp+fn))); + double brej = (tn+fp == 0.0 ? 0.0 : (tn / (tn+fp))); - sigEff.push_back (seff); - backRej.push_back (brej); + sigEff.push_back (seff); + backRej.push_back (brej); - // m_histROC->Fill (seff, brej); - addPoint ("ROC", seff, brej); // x, y + // m_histROC->Fill (seff, brej); + addPoint ("ROC", seff, brej); // x, y - double currentCut = (i * binSizeROC)+minVal; + double currentCut = (i * binSizeROC)+minVal; - double sig = tp; - double bkg = fp; - double significance = sig / sqrt (sig + bkg); - if (significance > bestSignificance) - { - bestSignificance = significance; - bestCutSignificance = currentCut; - } + double sig = tp; + double bkg = fp; + double significance = sig / sqrt (sig + bkg); + if (significance > bestSignificance) + { + bestSignificance = significance; + bestCutSignificance = currentCut; + } - addPoint ("Significance", currentCut, significance); - // m_histSignificance->Fill (currentCut, significance); + addPoint ("Significance", currentCut, significance); + // m_histSignificance->Fill (currentCut, significance); } - m_significances.push_back (bestSignificance); - static size_t testCycle = 0; + m_significances.push_back (bestSignificance); + static size_t testCycle = 0; - clear ("OutputSig"); - clear ("OutputBkg"); - for (size_t i = 0; i < numBinsData; ++i) + clear ("OutputSig"); + clear ("OutputBkg"); + for (size_t i = 0; i < numBinsData; ++i) { - addPoint ("OutputSig", x.at (i), datSig.at (i)/sumWeightsSig); - addPoint ("OutputBkg", x.at (i), datBkg.at (i)/sumWeightsBkg); - // m_histOutputSignal->Fill (x.at (i), datSig.at (1)/sumWeightsSig); - // m_histOutputBackground->Fill (x.at (i), datBkg.at (1)/sumWeightsBkg); + addPoint ("OutputSig", x.at (i), datSig.at (i)/sumWeightsSig); + addPoint ("OutputBkg", x.at (i), datBkg.at (i)/sumWeightsBkg); + // m_histOutputSignal->Fill (x.at (i), datSig.at (1)/sumWeightsSig); + // m_histOutputBackground->Fill (x.at (i), datBkg.at (1)/sumWeightsBkg); } - ++testCycle; + ++testCycle; - if (fMonitoring) + if (fMonitoring) { - plot ("ROC", "", 2, kRed); - plot ("Significance", "", 3, kRed); - plot ("OutputSig", "", 4, kRed); - plot ("OutputBkg", "same", 4, kBlue); - fMonitoring->ProcessEvents (); + plot ("ROC", "", 2, kRed); + plot ("Significance", "", 3, kRed); + plot ("OutputSig", "", 4, kRed); + plot ("OutputBkg", "same", 4, kBlue); + fMonitoring->ProcessEvents (); } - m_cutValue = bestCutSignificance; - } + m_cutValue = bestCutSignificance; + } - /** \brief check for convergence - * - */ - bool Settings::hasConverged (double testError) - { - // std::cout << "check convergence; minError " << m_minError << " current " << testError - // << " current convergence count " << m_convergenceCount << std::endl; - if (testError < m_minError*0.999) + /** \brief check for convergence + * + */ + bool Settings::hasConverged (double testError) + { + // std::cout << "check convergence; minError " << m_minError << " current " << testError + // << " current convergence count " << m_convergenceCount << std::endl; + if (testError < m_minError*0.999) { - m_convergenceCount = 0; - m_minError = testError; + m_convergenceCount = 0; + m_minError = testError; } - else + else { - ++m_convergenceCount; - m_maxConvergenceCount = std::max (m_convergenceCount, m_maxConvergenceCount); + ++m_convergenceCount; + m_maxConvergenceCount = std::max (m_convergenceCount, m_maxConvergenceCount); } - if (m_convergenceCount >= convergenceSteps () || testError <= 0) - return true; + if (m_convergenceCount >= convergenceSteps () || testError <= 0) + return true; - return false; - } + return false; + } - /** \brief set the weight sums to be scaled to (preparations for monitoring output) - * - */ - void ClassificationSettings::setWeightSums (double sumOfSigWeights, double sumOfBkgWeights) - { - m_sumOfSigWeights = sumOfSigWeights; m_sumOfBkgWeights = sumOfBkgWeights; - } + /** \brief set the weight sums to be scaled to (preparations for monitoring output) + * + */ + void ClassificationSettings::setWeightSums (double sumOfSigWeights, double sumOfBkgWeights) + { + m_sumOfSigWeights = sumOfSigWeights; m_sumOfBkgWeights = sumOfBkgWeights; + } - /** \brief preparation for monitoring output - * - */ - void ClassificationSettings::setResultComputation ( - std::string _fileNameNetConfig, - std::string _fileNameResult, - std::vector* _resultPatternContainer) - { - m_pResultPatternContainer = _resultPatternContainer; - m_fileNameResult = _fileNameResult; - m_fileNameNetConfig = _fileNameNetConfig; - } + /** \brief preparation for monitoring output + * + */ + void ClassificationSettings::setResultComputation ( + std::string _fileNameNetConfig, + std::string _fileNameResult, + std::vector* _resultPatternContainer) + { + m_pResultPatternContainer = _resultPatternContainer; + m_fileNameResult = _fileNameResult; + m_fileNameNetConfig = _fileNameNetConfig; + } @@ -512,39 +527,51 @@ namespace TMVA - /** \brief compute the number of weights given the size of the input layer - * - */ - size_t Net::numWeights (size_t trainingStartLayer) const - { - size_t num (0); - size_t index (0); - size_t prevNodes (inputSize ()); - for (auto& layer : m_layers) + /** \brief compute the number of weights given the size of the input layer + * + */ + size_t Net::numWeights (size_t trainingStartLayer) const + { + size_t num (0); + size_t index (0); + size_t prevNodes (inputSize ()); + for (auto& layer : m_layers) { - if (index >= trainingStartLayer) - num += layer.numWeights (prevNodes); - prevNodes = layer.numNodes (); - ++index; + if (index >= trainingStartLayer) + num += layer.numWeights (prevNodes); + prevNodes = layer.numNodes (); + ++index; } - return num; - } + return num; + } + size_t Net::numNodes (size_t trainingStartLayer) const + { + size_t num (0); + size_t index (0); + for (auto& layer : m_layers) + { + if (index >= trainingStartLayer) + num += layer.numNodes (); + ++index; + } + return num; + } - /** \brief prepare the drop-out container given the provided drop-fractions - * - */ - void Net::fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const - { - size_t numDrops = dropFraction * numNodes; - if (numDrops >= numNodes) // maintain at least one node - numDrops = numNodes - 1; - dropContainer.insert (end (dropContainer), numNodes-numDrops, true); // add the markers for the nodes which are enabled - dropContainer.insert (end (dropContainer), numDrops, false); // add the markers for the disabled nodes - // shuffle - std::random_shuffle (end (dropContainer)-numNodes, end (dropContainer)); // shuffle enabled and disabled markers - } + /** \brief prepare the drop-out container given the provided drop-fractions + * + */ + void Net::fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t _numNodes) const + { + size_t numDrops = dropFraction * _numNodes; + if (numDrops >= _numNodes) // maintain at least one node + numDrops = _numNodes - 1; + dropContainer.insert (end (dropContainer), _numNodes-numDrops, true); // add the markers for the nodes which are enabled + dropContainer.insert (end (dropContainer), numDrops, false); // add the markers for the disabled nodes + // shuffle + std::random_shuffle (end (dropContainer)-_numNodes, end (dropContainer)); // shuffle enabled and disabled markers + } @@ -553,6 +580,6 @@ namespace TMVA - }; // namespace DNN + }; // namespace DNN }; // namespace TMVA