From b191f1bac12f481a6a1af5c82a40ce79ff159445 Mon Sep 17 00:00:00 2001 From: mammadhajili Date: Tue, 15 Aug 2017 15:45:57 +0200 Subject: [PATCH 1/4] TMVA MultiProcessing --- tmva/tmva/inc/LinkDef4.h | 5 + tmva/tmva/inc/TMVA/Config.h | 5 + tmva/tmva/inc/TMVA/DataLoader.h | 6 +- tmva/tmva/src/Config.cxx | 2 +- tmva/tmva/src/CrossValidation.cxx | 60 ++- tmva/tmva/src/DataLoader.cxx | 42 +- tmva/tmva/src/HyperParameterOptimisation.cxx | 44 ++- tmva/tmva/src/VariableImportance.cxx | 395 ++++++++++++------- 8 files changed, 380 insertions(+), 179 deletions(-) diff --git a/tmva/tmva/inc/LinkDef4.h b/tmva/tmva/inc/LinkDef4.h index 7ac729c9dbb74..d8ccf917463f0 100644 --- a/tmva/tmva/inc/LinkDef4.h +++ b/tmva/tmva/inc/LinkDef4.h @@ -43,4 +43,9 @@ #pragma link C++ function TMVA::CreateVariableTransform; #pragma link C++ function TMVA::DataLoaderCopy; +#pragma link C++ function TMVA::DataLoaderCopy; + +#pragma link C++ class std::map+; + + #endif diff --git a/tmva/tmva/inc/TMVA/Config.h b/tmva/tmva/inc/TMVA/Config.h index 48734c8a156c5..115262caa1c6e 100644 --- a/tmva/tmva/inc/TMVA/Config.h +++ b/tmva/tmva/inc/TMVA/Config.h @@ -64,6 +64,9 @@ namespace TMVA { Bool_t DrawProgressBar() const { return fDrawProgressBar; } void SetDrawProgressBar( Bool_t d ) { fDrawProgressBar = d; } + + UInt_t NWorkers() const {return fNWorkers; } + void SetNWorkers (UInt_t n) {fNWorkers = n; } public: @@ -118,11 +121,13 @@ namespace TMVA { std::atomic fSilent; // no output at all std::atomic fWriteOptionsReference; // if set true: Configurable objects write file with option reference std::atomic fDrawProgressBar; // draw progress bar to indicate training evolution + std::atomic fNWorkers; // number of workers in multiprocessing parallelization #else Bool_t fUseColoredConsole; // coloured standard output Bool_t fSilent; // no output at all Bool_t fWriteOptionsReference; // if set true: Configurable objects write file with option reference Bool_t fDrawProgressBar; // draw progress bar to indicate training evolution + UInt_t fNWorkers; #endif mutable MsgLogger* fLogger; // message logger MsgLogger& Log() const { return *fLogger; } diff --git a/tmva/tmva/inc/TMVA/DataLoader.h b/tmva/tmva/inc/TMVA/DataLoader.h index d0d206dee25f8..7d5be1e467812 100644 --- a/tmva/tmva/inc/TMVA/DataLoader.h +++ b/tmva/tmva/inc/TMVA/DataLoader.h @@ -33,12 +33,14 @@ #include #include #include "TCut.h" +#include #include "TMVA/Factory.h" #include "TMVA/Types.h" #include "TMVA/DataSet.h" +#include "TFile.h" -class TFile; +//class TFile; class TTree; class TDirectory; class TH2; @@ -226,6 +228,8 @@ namespace TMVA { ClassDef(DataLoader,3); }; void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src); + std::vector < std::shared_ptr > DataLoaderCopyMP(TMVA::DataLoader *des, TMVA::DataLoader *src); + void DataLoaderCopyMPCloseFiles(std::vector< std::shared_ptr > files); } // namespace TMVA #endif diff --git a/tmva/tmva/src/Config.cxx b/tmva/tmva/src/Config.cxx index f7dd3ed340d22..c3ce9110695c4 100644 --- a/tmva/tmva/src/Config.cxx +++ b/tmva/tmva/src/Config.cxx @@ -57,6 +57,7 @@ TMVA::Config::Config() : fSilent ( kFALSE ), fWriteOptionsReference( kFALSE ), fDrawProgressBar ( kFALSE ), + fNWorkers ( 1 ), fLogger ( new MsgLogger("Config") ) { // plotting @@ -113,4 +114,3 @@ TMVA::Config& TMVA::Config::Instance() return fgConfigPtr ? *fgConfigPtr :*(fgConfigPtr = new Config()); #endif } - diff --git a/tmva/tmva/src/CrossValidation.cxx b/tmva/tmva/src/CrossValidation.cxx index 10fa1934d12f3..ce9b5a9705e83 100644 --- a/tmva/tmva/src/CrossValidation.cxx +++ b/tmva/tmva/src/CrossValidation.cxx @@ -11,6 +11,8 @@ #include "TMVA/ResultsClassification.h" #include "TMVA/tmvaglob.h" #include "TMVA/Types.h" +#include "ROOT/TProcessExecutor.hxx" + #include "TSystem.h" #include "TAxis.h" @@ -20,6 +22,10 @@ #include #include +using namespace std; + +//const UInt_t nWorkers = 2U; + /*! \class TMVA::CrossValidationResult \ingroup TMVA @@ -126,16 +132,17 @@ void TMVA::CrossValidation::Evaluate() fFoldStatus=kTRUE; } - // Process K folds - for(UInt_t i=0; iPrepareFoldDataSet(i, TMVA::Types::kTesting); - MethodBase* smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); + foldTitle += workerID+1; + auto classifier = std::unique_ptr(new TMVA::Factory("CrossValidation","!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification")); + fDataLoader->PrepareFoldDataSet(workerID, TMVA::Types::kTesting); + MethodBase* smethod = classifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); // Train method Event::SetIsTraining(kTRUE); @@ -147,10 +154,11 @@ void TMVA::CrossValidation::Evaluate() smethod->TestClassification(); // Store results - fResults.fROCs[i] = fClassifier->GetROCIntegral(fDataLoader->GetName(),methodTitle); + auto res = classifier->GetROCIntegral(fDataLoader->GetName(),methodTitle); + //fResults.fROCs[workerID] = classifier->GetROCIntegral(fDataLoader->GetName(),methodTitle); - TGraph* gr = fClassifier->GetROCCurve(fDataLoader->GetName(), methodTitle, true); - gr->SetLineColor(i+1); + TGraph* gr = classifier->GetROCCurve(fDataLoader->GetName(), methodTitle, true); + gr->SetLineColor(workerID+1); gr->SetLineWidth(2); gr->SetTitle(foldTitle.Data()); fResults.fROCCurves->Add(gr); @@ -159,7 +167,7 @@ void TMVA::CrossValidation::Evaluate() fResults.fSeps.push_back(smethod->GetSeparation()); Double_t err; - fResults.fEff01s.push_back(smethod->GetEfficiency("Efficiency:0.01",Types::kTesting, err)); + fResults.fEff01s.push_back(smethod->GetEfficiency("Efficiency:0.01",Types::kTesting,err)); fResults.fEff10s.push_back(smethod->GetEfficiency("Efficiency:0.10",Types::kTesting,err)); fResults.fEff30s.push_back(smethod->GetEfficiency("Efficiency:0.30",Types::kTesting,err)); fResults.fEffAreas.push_back(smethod->GetEfficiency("" ,Types::kTesting,err)); @@ -170,9 +178,33 @@ void TMVA::CrossValidation::Evaluate() // Clean-up for this fold smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification); smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTraining, Types::kClassification); - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - } + classifier->DeleteAllMethods(); + classifier->fMethodsMap.clear(); + + return make_pair(res, workerID); + }; + + vector< pair < double, UInt_t > > res; + + auto nWorkers = TMVA::gConfig().NWorkers(); + + if(nWorkers > 1) { + ROOT::TProcessExecutor workers(nWorkers); + res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); + } + + else { + + for(UInt_t i = 0; i < fNumFolds; ++ i) { + auto res_pair = workItem(i); + res.push_back(res_pair); + } + + } + + for(auto res_pair: res) { + fResults.fROCs[res_pair.second] = res_pair.first; + } TMVA::gConfig().SetSilent(kFALSE); Log() << kINFO << "Evaluation done." << Endl; diff --git a/tmva/tmva/src/DataLoader.cxx b/tmva/tmva/src/DataLoader.cxx index 55c294a9b4af1..1754d6ceb6694 100644 --- a/tmva/tmva/src/DataLoader.cxx +++ b/tmva/tmva/src/DataLoader.cxx @@ -715,6 +715,7 @@ void TMVA::DataLoader::MakeKFoldDataSet(UInt_t numberFolds, bool validationSet){ void TMVA::DataLoader::PrepareFoldDataSet(UInt_t foldNumber, Types::ETreeType tt){ + UInt_t numFolds = fTrainSigEvents.size(); std::vector* tempTrain = new std::vector; @@ -853,17 +854,52 @@ TMVA::DataLoader* TMVA::DataLoader::MakeCopy(TString name) void TMVA::DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src) { + for( std::vector::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();treeinfo++) { - des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); + des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); } for( std::vector::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();treeinfo++) { - des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); + des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); } } - +std::vector< std::shared_ptr > TMVA::DataLoaderCopyMP(TMVA::DataLoader *des, TMVA::DataLoader *src) { + + std::vector< std::shared_ptr > vec_files; + for(std::vector::const_iterator treeinfo_signal=src->DataInput().Sbegin(), treeinfo_back=src->DataInput().Bbegin(); + treeinfo_signal!=src->DataInput().Send(), treeinfo_back!=src->DataInput().Bend(); + treeinfo_signal++, treeinfo_back++) + { + TTree *stree = treeinfo_signal -> GetTree(); + TTree *btree = treeinfo_back -> GetTree(); + + TString sfileName = stree->GetCurrentFile()->GetName(); + TString bfileName = btree->GetCurrentFile()->GetName(); + std::shared_ptr sfile( TFile::Open(sfileName)) ; + std::shared_ptr bfile; + if (bfileName != sfileName) { + bfile = std::shared_ptr(TFile::Open(bfileName)); + } + else { + bfile = sfile; + } + TTree* signalTree = (TTree*)sfile->Get(stree->GetName()); + TTree* backgTree = (TTree*)bfile->Get(btree->GetName()); + des->AddSignalTree(signalTree); + des->AddBackgroundTree(backgTree); + + vec_files.push_back(sfile); + vec_files.push_back(bfile); + } + return vec_files; +} +void TMVA::DataLoaderCopyMPCloseFiles(std::vector > files) { + for(auto file: files) { + file->Close(); + } +} //////////////////////////////////////////////////////////////////////////////// /// returns the correlation matrix of datasets diff --git a/tmva/tmva/src/HyperParameterOptimisation.cxx b/tmva/tmva/src/HyperParameterOptimisation.cxx index 6e96c0ada093d..9ad078abe56e5 100644 --- a/tmva/tmva/src/HyperParameterOptimisation.cxx +++ b/tmva/tmva/src/HyperParameterOptimisation.cxx @@ -14,11 +14,13 @@ #include "TMultiGraph.h" #include "TString.h" #include "TSystem.h" +#include "ROOT/TProcessExecutor.hxx" #include #include #include +using namespace std; /*! \class TMVA::HyperParameterOptimisationResult \ingroup TMVA @@ -29,6 +31,8 @@ */ +//const int nWorkers = 4U; + TMVA::HyperParameterOptimisationResult::HyperParameterOptimisationResult() : fROCAVG(0.0), fROCCurves(std::make_shared()) { @@ -98,27 +102,41 @@ void TMVA::HyperParameterOptimisation::Evaluate() fFoldStatus=kTRUE; } fResults.fMethodName = methodName; + auto workItem = [&](UInt_t workerID) { + TString foldTitle = methodTitle; + foldTitle += "_opt"; + foldTitle += workerID+1; - for(UInt_t i = 0; i < fNumFolds; ++i) { - - TString foldTitle = methodTitle; - foldTitle += "_opt"; - foldTitle += i+1; + Event::SetIsTraining(kTRUE); + fDataLoader->PrepareFoldDataSet(workerID, TMVA::Types::kTraining); - Event::SetIsTraining(kTRUE); - fDataLoader->PrepareFoldDataSet(i, TMVA::Types::kTraining); + auto smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); - auto smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); + auto params=smethod->OptimizeTuningParameters(fFomType,fFitType); - auto params=smethod->OptimizeTuningParameters(fFomType,fFitType); - fResults.fFoldParameters.push_back(params); + //fResults.fFoldParameters.push_back(params); - smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTraining, Types::kClassification); + smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTraining, Types::kClassification); - fClassifier->DeleteAllMethods(); + fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); + fClassifier->fMethodsMap.clear(); + return params; + }; + vector < map > res; + auto nWorkers = TMVA::gConfig().NWorkers(); + if(nWorkers> 1) { + ROOT::TProcessExecutor workers(nWorkers); + res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); + } + else { + for(UInt_t i = 0; i < fNumFolds; ++ i) { + res.push_back(workItem(i)); + } + } + for(auto results: res) { + fResults.fFoldParameters.push_back(results); } } diff --git a/tmva/tmva/src/VariableImportance.cxx b/tmva/tmva/src/VariableImportance.cxx index 729df32a6c0d1..52e81dc45df09 100644 --- a/tmva/tmva/src/VariableImportance.cxx +++ b/tmva/tmva/src/VariableImportance.cxx @@ -21,6 +21,7 @@ #include "TMVA/MsgLogger.h" #include "TMVA/Types.h" #include "TMVA/VarTransformHandler.h" +#include "ROOT/TProcessExecutor.hxx" #include "TAxis.h" #include "TGraph.h" @@ -29,16 +30,17 @@ #include "TRandom3.h" #include "TStyle.h" #include "TSystem.h" +#include "TFile.h" #include #include #include #include - +#include //number of bits for bitset -#define NBITS 32 - +#define NBITS 32 +using namespace std; //////////////////////////////////////////////////////////////////////////////// TMVA::VariableImportanceResult::VariableImportanceResult():fImportanceValues("VariableImportance"), @@ -219,10 +221,9 @@ void TMVA::VariableImportance::EvaluateImportanceShort() for (UInt_t index = 0; index < nbits; index++){ if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); } - - //Loading Dataset + //Loading Dataset DataLoaderCopy(seeddl,fDataLoader.get()); - + seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); //Booking Seed fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); @@ -235,59 +236,82 @@ void TMVA::VariableImportance::EvaluateImportanceShort() SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle); delete seeddl; + fClassifier->DeleteAllMethods(); fClassifier->fMethodsMap.clear(); - for (uint32_t i = 0; i < NBITS; ++i) { - if (x & (1 << i)) { - y = x & ~(1 << i); - std::bitset ybitset(y); - //need at least one variable - //NOTE: if subssed is zero then is the special case - //that count in xbitset is 1 - Double_t ny = log(x - y) / 0.693147; - if (y == 0) { - importances[ny] = SROC - 0.5; - continue; - } - - //creating loader for subseed - TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); - //adding variables from subseed - for (UInt_t index = 0; index < nbits; index++) { - if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); - } - - //Loading Dataset - DataLoaderCopy(subseeddl,fDataLoader.get()); - - //Booking SubSeed - fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); - - //Train/Test/Evaluation - fClassifier->TrainAllMethods(); - fClassifier->TestAllMethods(); - fClassifier->EvaluateAllMethods(); - - //getting ROC - SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); - importances[ny] += SROC - SSROC; - - delete subseeddl; - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - } - } - Float_t normalization = 0.0; - for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; - - for(UInt_t i=0;i(varNames[i])+" % "; - } - fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits,importances,varNames)); + auto workItem = [&](UInt_t workerID) { + uint32_t i = workerID; + if (x & (1 << i)) { + y = x & ~(1 << i); + std::bitset ybitset(y); + //need at least one variable + //NOTE: if subssed is zero then is the special case + //that count in xbitset is 1 + Double_t ny = log(x - y) / 0.693147; + if (y == 0) { + return make_pair(ny, 0.5); + } + + //creating loader for subseed + TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); + + //adding variables from subseed + for (UInt_t index = 0; index < nbits; index++) { + if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); + } + + //Loading Dataset + std::vector< std::shared_ptr > files = DataLoaderCopyMP(subseeddl,fDataLoader.get()); + subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + //Booking SubSeed + fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); + + //Train/Test/Evaluation + fClassifier->TrainAllMethods(); + fClassifier->TestAllMethods(); + fClassifier->EvaluateAllMethods(); + + //getting ROC + SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); + //importances[ny] += SROC - SSROC; + + delete subseeddl; + fClassifier->DeleteAllMethods(); + fClassifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + + return make_pair((double)ny, (double)SSROC ); + } + + else return make_pair(-1., (double)0.); + }; + vector< pair > results; + if(TMVA::gConfig().NWorkers() > 1) { + ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); + results = workers.Map(workItem, ROOT::TSeqI(32)); + } + else { + for(int i = 0; i < 32; ++ i) { + auto res = workItem(i); + results.push_back(res); + } + } + for(auto res_pair: results) { + if(res_pair.first >= 0) + importances[res_pair.first] += SROC - res_pair.second; + } + + Float_t normalization = 0.0; + for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; + + for(UInt_t i=0;i(varNames[i])+" % "; + } + fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits,importances,varNames)); } //////////////////////////////////////////////////////////////////////////////// @@ -300,7 +324,7 @@ void TMVA::VariableImportance::EvaluateImportanceRandom(UInt_t seeds) TRandom3 *rangen = new TRandom3(0); //Random Gen. - uint32_t x = 0; + //uint32_t x = 0; uint32_t y = 0; //getting number of variables and variable names from loader @@ -311,31 +335,39 @@ void TMVA::VariableImportance::EvaluateImportanceRandom(UInt_t seeds) //vector to save importances std::vector importances(nbits); - Float_t importances_norm = 0; + //Float_t importances_norm = 0; for (UInt_t i = 0; i < nbits; i++)importances[i] = 0; Float_t SROC, SSROC; //computed ROC value for every Seed and SubSeed - x = range; - - for (UInt_t n = 0; n < seeds; n++) { - x = rangen -> Integer(range); - - std::bitset xbitset(x); - if (x == 0) continue; //dataloader need at least one variable + //x = range; + //int cnt = 0; + std::unordered_mapused; + auto workItem = [&](UInt_t workerID) { + while(true) { + workerID = rangen -> Integer(range); + if(!used[workerID] && workerID != 0) break; + } + std::bitset xbitset(workerID); //dataloader need at least one variable - //creating loader for seed + used[workerID] = 1; + //creating loader for seed TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); - - //adding variables from seed + //adding variables from seed for (UInt_t index = 0; index < nbits; index++) { + //if(xbitset[index]) + // std::cout << varNames[index] << " "; if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); } //Loading Dataset - DataLoaderCopy(seeddl,fDataLoader.get()); + + std::vector< std::shared_ptr > files = DataLoaderCopyMP(seeddl,fDataLoader.get()); + + + seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); //Booking Seed fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); @@ -349,51 +381,101 @@ void TMVA::VariableImportance::EvaluateImportanceRandom(UInt_t seeds) SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle); delete seeddl; + fClassifier->DeleteAllMethods(); fClassifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + + return make_pair(SROC, workerID); + }; + + vector< pair > SROC_results; + ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); + + // Fill the pool with work + if(TMVA::gConfig().NWorkers() > 1) { + SROC_results = workers.Map(workItem, ROOT::TSeqI(std::min(range - 1, ULong_t(seeds)))); + } + else { + + for(UInt_t i = 0; i < std::min(range - 1, ULong_t(seeds)); ++ i) { + auto res = workItem(i); + SROC_results.push_back(res); + } + + } + + + for(auto res: SROC_results) { + auto xx = res.second; + auto SROC_ = res.first; + auto workItemsub = [&](UInt_t workerIDsub) { + uint32_t i = workerIDsub; + if (xx & (1 << i)) { + y = xx & ~(1 << i); + + std::bitset ybitset(y); + //need at least one variable + //NOTE: if subssed is zero then is the special case + //that count in xbitset is 1 + Double_t ny = log(xx - y) / 0.693147; + if (y == 0) { + return make_pair(ny, .5); + } + + //creating loader for subseed + TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); + + //adding variables from subseed + for (UInt_t index = 0; index < nbits; index++) { + if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); + } + + //Loading Dataset + std::vector< std::shared_ptr > files = DataLoaderCopyMP(subseeddl,fDataLoader.get()); + subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + //Booking SubSeed + fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); + + //Train/Test/Evaluation + fClassifier->TrainAllMethods(); + fClassifier->TestAllMethods(); + fClassifier->EvaluateAllMethods(); + + //getting ROC + SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); + //importance += SROC_ - SSROC; + + delete subseeddl; + + fClassifier->DeleteAllMethods(); + fClassifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + + return make_pair((double)ny, (double)SSROC); + } + else return make_pair(-1.,(double)0.); + }; + vector< pair > results; + if(TMVA::gConfig().NWorkers() > 1) { + ROOT::TProcessExecutor workers_sub(TMVA::gConfig().NWorkers()); + + // Fill the pool with work + results = workers_sub.Map(workItemsub, ROOT::TSeqI(32)); + } + else { + for(int i = 0; i < 32; ++ i) { + auto res_sub = workItemsub(i); + results.push_back(res_sub); + } + + } + for(auto res_pair: results) { + importances[res_pair.first] += SROC_-res_pair.second; + } + } - for (uint32_t i = 0; i < 32; ++i) { - if (x & (1 << i)) { - y = x & ~(1 << i); - std::bitset ybitset(y); - //need at least one variable - //NOTE: if subssed is zero then is the special case - //that count in xbitset is 1 - Double_t ny = log(x - y) / 0.693147; - if (y == 0) { - importances[ny] = SROC - 0.5; - importances_norm += importances[ny]; - continue; - } - - //creating loader for subseed - TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); - //adding variables from subseed - for (UInt_t index = 0; index < nbits; index++) { - if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); - } - - //Loading Dataset - DataLoaderCopy(subseeddl,fDataLoader.get()); - - //Booking SubSeed - fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); - - //Train/Test/Evaluation - fClassifier->TrainAllMethods(); - fClassifier->TestAllMethods(); - fClassifier->EvaluateAllMethods(); - //getting ROC - SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); - importances[ny] += SROC - SSROC; - - delete subseeddl; - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - } - } - } Float_t normalization = 0.0; for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; @@ -431,48 +513,67 @@ void TMVA::VariableImportance::EvaluateImportanceAll() std::vector importances(nbits); //vector to save ROC-Integral values - std::vector ROC(range); - ROC[0]=0.5; + //std::vector ROC(range); + //ROC[0]=0.5; for (UInt_t i = 0; i < nbits; i++) importances[i] = 0; Float_t SROC, SSROC; //computed ROC value - for ( x = 1; x xbitset(x); - if (x == 0) continue; //dataloader need at least one variable - - //creating loader for seed - TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); - - //adding variables from seed - for (UInt_t index = 0; index < nbits; index++) { - if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); - } - - DataLoaderCopy(seeddl,fDataLoader.get()); - - seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - - //Booking Seed - fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); - - //Train/Test/Evaluation - fClassifier->TrainAllMethods(); - fClassifier->TestAllMethods(); - fClassifier->EvaluateAllMethods(); - - //getting ROC - ROC[x] = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle); - - delete seeddl; - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - } - for ( x = 0; x xbitset(workerID); + + if(workerID == 0) return ROC; + //creating loader for seed + TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); + + //adding variables from seed + for (UInt_t index = 0; index < nbits; index++) { + if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); + } + + std::vector< std::shared_ptr > files = DataLoaderCopyMP(seeddl,fDataLoader.get()); + seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + + TMVA::gConfig().SetSilent(kFALSE); + auto classifier =std::unique_ptr(new TMVA::Factory("VariableImportanceworker","!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification")); + classifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); + //Train/Test/Evaluation + //Booking Seed + + classifier->TrainAllMethods(); + classifier->TestAllMethods(); + classifier->EvaluateAllMethods(); + + //getting ROC + ROC = classifier->GetROCIntegral(xbitset.to_string(), methodTitle); + + delete seeddl; + classifier->DeleteAllMethods(); + classifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + return ROC; + }; + + vectorROC_result; + if(TMVA::gConfig().NWorkers() > 1) { + ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); + + // Fill the pool with work + ROC_result = workers.Map(workItem, ROOT::TSeqI(range)); + } + else { + for(UInt_t i = 0; i < range; ++ i) { + auto res = workItem(i); + ROC_result.push_back(res); + } + } + for ( x = 0; x Date: Wed, 16 Aug 2017 15:49:32 +0200 Subject: [PATCH 2/4] Fixed clang-format issues --- tmva/tmva/inc/TMVA/Config.h | 20 +- tmva/tmva/inc/TMVA/DataLoader.h | 36 +- tmva/tmva/src/Config.cxx | 10 +- tmva/tmva/src/CrossValidation.cxx | 44 +- tmva/tmva/src/DataLoader.cxx | 41 +- tmva/tmva/src/HyperParameterOptimisation.cxx | 41 +- tmva/tmva/src/VariableImportance.cxx | 485 +++++++++---------- 7 files changed, 330 insertions(+), 347 deletions(-) diff --git a/tmva/tmva/inc/TMVA/Config.h b/tmva/tmva/inc/TMVA/Config.h index 115262caa1c6e..f477f56cf14bb 100644 --- a/tmva/tmva/inc/TMVA/Config.h +++ b/tmva/tmva/inc/TMVA/Config.h @@ -1,4 +1,4 @@ -// @(#)root/tmva $Id$ +// @(#)root/tmva $Id$ // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss /********************************************************************************** @@ -47,7 +47,7 @@ namespace TMVA { class MsgLogger; class Config { - + public: static Config& Instance(); @@ -64,9 +64,9 @@ namespace TMVA { Bool_t DrawProgressBar() const { return fDrawProgressBar; } void SetDrawProgressBar( Bool_t d ) { fDrawProgressBar = d; } - - UInt_t NWorkers() const {return fNWorkers; } - void SetNWorkers (UInt_t n) {fNWorkers = n; } + + UInt_t NWorkers() const { return fNWorkers; } + void SetNWorkers ( UInt_t n ) { fNWorkers = n; } public: @@ -100,8 +100,8 @@ namespace TMVA { TString fWeightFileExtension; TString fOptionsReferenceFileDir; } fIONames; // Customisable weight file properties - - + + private: // private constructor @@ -113,7 +113,7 @@ namespace TMVA { static std::atomic fgConfigPtr; #else static Config* fgConfigPtr; -#endif +#endif private: #if __cplusplus > 199711L @@ -121,7 +121,7 @@ namespace TMVA { std::atomic fSilent; // no output at all std::atomic fWriteOptionsReference; // if set true: Configurable objects write file with option reference std::atomic fDrawProgressBar; // draw progress bar to indicate training evolution - std::atomic fNWorkers; // number of workers in multiprocessing parallelization + std::atomic fNWorkers; // number of workers in multiprocessing parallelization #else Bool_t fUseColoredConsole; // coloured standard output Bool_t fSilent; // no output at all @@ -131,7 +131,7 @@ namespace TMVA { #endif mutable MsgLogger* fLogger; // message logger MsgLogger& Log() const { return *fLogger; } - + ClassDef(Config,0); // Singleton class for global configuration settings }; diff --git a/tmva/tmva/inc/TMVA/DataLoader.h b/tmva/tmva/inc/TMVA/DataLoader.h index 7d5be1e467812..8f0206dc1872b 100644 --- a/tmva/tmva/inc/TMVA/DataLoader.h +++ b/tmva/tmva/inc/TMVA/DataLoader.h @@ -40,7 +40,6 @@ #include "TMVA/DataSet.h" #include "TFile.h" -//class TFile; class TTree; class TDirectory; class TH2; @@ -86,16 +85,16 @@ namespace TMVA { // special case: signal/background // Data input related - void SetInputTrees( const TString& signalFileName, const TString& backgroundFileName, + void SetInputTrees( const TString& signalFileName, const TString& backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0 ); void SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut ); // Set input trees at once - void SetInputTrees( TTree* signal, TTree* background, + void SetInputTrees( TTree* signal, TTree* background, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0) ; void AddSignalTree( TTree* signal, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); void AddSignalTree( TString datFileS, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); - void AddSignalTree( TTree* signal, Double_t weight, const TString& treetype ); + void AddSignalTree( TTree* signal, Double_t weight, const TString& treetype ); // ... depreciated, kept for backwards compatibility void SetSignalTree( TTree* signal, Double_t weight=1.0); @@ -111,9 +110,9 @@ namespace TMVA { void SetBackgroundWeightExpression( const TString& variable ); // special case: regression - void AddRegressionTree( TTree* tree, Double_t weight = 1.0, - Types::ETreeType treetype = Types::kMaxTreeType ) { - AddTree( tree, "Regression", weight, "", treetype ); + void AddRegressionTree( TTree* tree, Double_t weight = 1.0, + Types::ETreeType treetype = Types::kMaxTreeType ) { + AddTree( tree, "Regression", weight, "", treetype ); } // general @@ -155,10 +154,10 @@ namespace TMVA { void PrepareTrainingAndTestTree( const TCut& cut, const TString& splitOpt ); void PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt ); - // ... deprecated, kept for backwards compatibility + // ... deprecated, kept for backwards compatibility void PrepareTrainingAndTestTree( const TCut& cut, Int_t Ntrain, Int_t Ntest = -1 ); - void PrepareTrainingAndTestTree( const TCut& cut, Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest, + void PrepareTrainingAndTestTree( const TCut& cut, Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest, const TString& otherOpt="SplitMode=Random:!V" ); void PrepareTrainingAndTestTree( int foldNumber, Types::ETreeType tt ); @@ -170,15 +169,15 @@ namespace TMVA { const DataSetInfo& GetDefaultDataSetInfo(){ return DefaultDataSetInfo(); } TH2* GetCorrelationMatrix(const TString& className); - + //Copy method use in VI and CV DEPRECATED: you can just call Clone DataLoader *dl2=(DataLoader *)dl1->Clone("dl2") DataLoader* MakeCopy(TString name); - friend void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src); + friend void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src); DataInputHandler& DataInput() { return *fDataInputHandler; } - + private: - + DataSetInfo& DefaultDataSetInfo(); void SetInputTreesFromEventAssignTrees(); @@ -190,7 +189,7 @@ namespace TMVA { DataSetManager* fDataSetManager; // DSMTEST - + DataInputHandler* fDataInputHandler;//-> std::vector fDefaultTrfs; // list of transformations on default DataSet @@ -201,7 +200,7 @@ namespace TMVA { Bool_t fVerbose; // verbose mode // flag determining the way training and test data are assigned to DataLoader - enum DataAssignType { kUndefined = 0, + enum DataAssignType { kUndefined = 0, kAssignTrees, kAssignEvents }; DataAssignType fDataAssignType; // flags for data assigning @@ -218,7 +217,7 @@ namespace TMVA { Int_t fATreeType = 0; // type of event (=classIndex) Float_t fATreeWeight = 0.0; // weight of the event std::vector fATreeEvent; // event variables - + Types::EAnalysisType fAnalysisType; // the training type Bool_t fMakeFoldDataSet; // flag telling if the DataSet folds have been done @@ -228,9 +227,8 @@ namespace TMVA { ClassDef(DataLoader,3); }; void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src); - std::vector < std::shared_ptr > DataLoaderCopyMP(TMVA::DataLoader *des, TMVA::DataLoader *src); - void DataLoaderCopyMPCloseFiles(std::vector< std::shared_ptr > files); + std::vector> DataLoaderCopyMP(TMVA::DataLoader *des, TMVA::DataLoader *src); + void DataLoaderCopyMPCloseFiles(std::vector> files); } // namespace TMVA #endif - diff --git a/tmva/tmva/src/Config.cxx b/tmva/tmva/src/Config.cxx index c3ce9110695c4..5d7b5ec613d13 100644 --- a/tmva/tmva/src/Config.cxx +++ b/tmva/tmva/src/Config.cxx @@ -53,12 +53,12 @@ TMVA::Config& TMVA::gConfig() { return TMVA::Config::Instance(); } /// constructor - set defaults TMVA::Config::Config() : - fUseColoredConsole ( kTRUE ), - fSilent ( kFALSE ), + fUseColoredConsole(kTRUE), + fSilent(kFALSE), fWriteOptionsReference( kFALSE ), - fDrawProgressBar ( kFALSE ), - fNWorkers ( 1 ), - fLogger ( new MsgLogger("Config") ) + fDrawProgressBar(kFALSE), + fNWorkers(1), + fLogger(new MsgLogger("Config")) { // plotting fVariablePlotting.fTimesRMS = 8.0; diff --git a/tmva/tmva/src/CrossValidation.cxx b/tmva/tmva/src/CrossValidation.cxx index ce9b5a9705e83..27d41e82ecd01 100644 --- a/tmva/tmva/src/CrossValidation.cxx +++ b/tmva/tmva/src/CrossValidation.cxx @@ -13,7 +13,6 @@ #include "TMVA/Types.h" #include "ROOT/TProcessExecutor.hxx" - #include "TSystem.h" #include "TAxis.h" #include "TCanvas.h" @@ -24,9 +23,6 @@ #include using namespace std; -//const UInt_t nWorkers = 2U; - - /*! \class TMVA::CrossValidationResult \ingroup TMVA */ @@ -132,17 +128,17 @@ void TMVA::CrossValidation::Evaluate() fFoldStatus=kTRUE; } - auto workItem = [&](UInt_t workerID) { Log() << kDEBUG << "Fold (" << methodTitle << "): " << workerID << Endl; // Get specific fold of dataset and setup method TString foldTitle = methodTitle; foldTitle += "_fold"; - foldTitle += workerID+1; - auto classifier = std::unique_ptr(new TMVA::Factory("CrossValidation","!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification")); + foldTitle += workerID + 1; + auto classifier = std::unique_ptr(new TMVA::Factory( + "CrossValidation","!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification")); fDataLoader->PrepareFoldDataSet(workerID, TMVA::Types::kTesting); - MethodBase* smethod = classifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); + MethodBase *smethod = classifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); // Train method Event::SetIsTraining(kTRUE); @@ -154,11 +150,10 @@ void TMVA::CrossValidation::Evaluate() smethod->TestClassification(); // Store results - auto res = classifier->GetROCIntegral(fDataLoader->GetName(),methodTitle); - //fResults.fROCs[workerID] = classifier->GetROCIntegral(fDataLoader->GetName(),methodTitle); + auto res = classifier->GetROCIntegral(fDataLoader->GetName(), methodTitle); TGraph* gr = classifier->GetROCCurve(fDataLoader->GetName(), methodTitle, true); - gr->SetLineColor(workerID+1); + gr->SetLineColor(workerID + 1); gr->SetLineWidth(2); gr->SetTitle(foldTitle.Data()); fResults.fROCCurves->Add(gr); @@ -167,10 +162,10 @@ void TMVA::CrossValidation::Evaluate() fResults.fSeps.push_back(smethod->GetSeparation()); Double_t err; - fResults.fEff01s.push_back(smethod->GetEfficiency("Efficiency:0.01",Types::kTesting,err)); - fResults.fEff10s.push_back(smethod->GetEfficiency("Efficiency:0.10",Types::kTesting,err)); - fResults.fEff30s.push_back(smethod->GetEfficiency("Efficiency:0.30",Types::kTesting,err)); - fResults.fEffAreas.push_back(smethod->GetEfficiency("" ,Types::kTesting,err)); + fResults.fEff01s.push_back(smethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err)); + fResults.fEff10s.push_back(smethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err)); + fResults.fEff30s.push_back(smethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err)); + fResults.fEffAreas.push_back(smethod->GetEfficiency("" , Types::kTesting, err)); fResults.fTrainEff01s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.01")); fResults.fTrainEff10s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.10")); fResults.fTrainEff30s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.30")); @@ -183,32 +178,29 @@ void TMVA::CrossValidation::Evaluate() return make_pair(res, workerID); }; - - vector< pair < double, UInt_t > > res; + vector> res; auto nWorkers = TMVA::gConfig().NWorkers(); if(nWorkers > 1) { - ROOT::TProcessExecutor workers(nWorkers); - res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); + ROOT::TProcessExecutor workers(nWorkers); + res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); } else { - for(UInt_t i = 0; i < fNumFolds; ++ i) { auto res_pair = workItem(i); res.push_back(res_pair); } - } for(auto res_pair: res) { - fResults.fROCs[res_pair.second] = res_pair.first; - } + fResults.fROCs[res_pair.second] = res_pair.first; + } - TMVA::gConfig().SetSilent(kFALSE); - Log() << kINFO << "Evaluation done." << Endl; - TMVA::gConfig().SetSilent(kTRUE); + TMVA::gConfig().SetSilent(kFALSE); + Log() << kINFO << "Evaluation done." << Endl; + TMVA::gConfig().SetSilent(kTRUE); } const TMVA::CrossValidationResult& TMVA::CrossValidation::GetResults() const { diff --git a/tmva/tmva/src/DataLoader.cxx b/tmva/tmva/src/DataLoader.cxx index 1754d6ceb6694..1005a09c5b017 100644 --- a/tmva/tmva/src/DataLoader.cxx +++ b/tmva/tmva/src/DataLoader.cxx @@ -715,7 +715,6 @@ void TMVA::DataLoader::MakeKFoldDataSet(UInt_t numberFolds, bool validationSet){ void TMVA::DataLoader::PrepareFoldDataSet(UInt_t foldNumber, Types::ETreeType tt){ - UInt_t numFolds = fTrainSigEvents.size(); std::vector* tempTrain = new std::vector; @@ -857,45 +856,47 @@ void TMVA::DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src) for( std::vector::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();treeinfo++) { - des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); + des->AddSignalTree((*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); } for( std::vector::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();treeinfo++) { - des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); + des->AddBackgroundTree((*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); } } -std::vector< std::shared_ptr > TMVA::DataLoaderCopyMP(TMVA::DataLoader *des, TMVA::DataLoader *src) { +std::vector> TMVA::DataLoaderCopyMP(TMVA::DataLoader *des, TMVA::DataLoader *src) +{ - std::vector< std::shared_ptr > vec_files; - for(std::vector::const_iterator treeinfo_signal=src->DataInput().Sbegin(), treeinfo_back=src->DataInput().Bbegin(); + std::vector> vec_files; + for(std::vector::const_iterator treeinfo_signal=src->DataInput().Sbegin(), + treeinfo_back=src->DataInput().Bbegin(); treeinfo_signal!=src->DataInput().Send(), treeinfo_back!=src->DataInput().Bend(); - treeinfo_signal++, treeinfo_back++) - { + treeinfo_signal++, treeinfo_back++) { TTree *stree = treeinfo_signal -> GetTree(); TTree *btree = treeinfo_back -> GetTree(); TString sfileName = stree->GetCurrentFile()->GetName(); - TString bfileName = btree->GetCurrentFile()->GetName(); - std::shared_ptr sfile( TFile::Open(sfileName)) ; - std::shared_ptr bfile; - if (bfileName != sfileName) { - bfile = std::shared_ptr(TFile::Open(bfileName)); + TString bfileName = btree->GetCurrentFile()->GetName(); + std::shared_ptr sfile( TFile::Open(sfileName)) ; + std::shared_ptr bfile; + if (bfileName != sfileName) { + bfile = std::shared_ptr(TFile::Open(bfileName)); } - else { + else { bfile = sfile; } - TTree* signalTree = (TTree*)sfile->Get(stree->GetName()); + TTree* signalTree = (TTree*)sfile->Get(stree->GetName()); TTree* backgTree = (TTree*)bfile->Get(btree->GetName()); - des->AddSignalTree(signalTree); - des->AddBackgroundTree(backgTree); + des->AddSignalTree(signalTree); + des->AddBackgroundTree(backgTree); - vec_files.push_back(sfile); - vec_files.push_back(bfile); + vec_files.push_back(sfile); + vec_files.push_back(bfile); } return vec_files; } -void TMVA::DataLoaderCopyMPCloseFiles(std::vector > files) { +void TMVA::DataLoaderCopyMPCloseFiles(std::vector> files) +{ for(auto file: files) { file->Close(); } diff --git a/tmva/tmva/src/HyperParameterOptimisation.cxx b/tmva/tmva/src/HyperParameterOptimisation.cxx index 9ad078abe56e5..6c6171dc4a23a 100644 --- a/tmva/tmva/src/HyperParameterOptimisation.cxx +++ b/tmva/tmva/src/HyperParameterOptimisation.cxx @@ -31,8 +31,6 @@ using namespace std; */ -//const int nWorkers = 4U; - TMVA::HyperParameterOptimisationResult::HyperParameterOptimisationResult() : fROCAVG(0.0), fROCCurves(std::make_shared()) { @@ -103,40 +101,39 @@ void TMVA::HyperParameterOptimisation::Evaluate() } fResults.fMethodName = methodName; auto workItem = [&](UInt_t workerID) { - TString foldTitle = methodTitle; - foldTitle += "_opt"; - foldTitle += workerID+1; - Event::SetIsTraining(kTRUE); - fDataLoader->PrepareFoldDataSet(workerID, TMVA::Types::kTraining); + TString foldTitle = methodTitle; + foldTitle += "_opt"; + foldTitle += workerID+1; + + Event::SetIsTraining(kTRUE); + fDataLoader->PrepareFoldDataSet(workerID, TMVA::Types::kTraining); - auto smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); + auto smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); - auto params=smethod->OptimizeTuningParameters(fFomType,fFitType); + auto params=smethod->OptimizeTuningParameters(fFomType,fFitType); - //fResults.fFoldParameters.push_back(params); + smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTraining, Types::kClassification); - smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTraining, Types::kClassification); + fClassifier->DeleteAllMethods(); - fClassifier->DeleteAllMethods(); + fClassifier->fMethodsMap.clear(); - fClassifier->fMethodsMap.clear(); + return params; - return params; }; - vector < map > res; + vector> res; auto nWorkers = TMVA::gConfig().NWorkers(); - if(nWorkers> 1) { - ROOT::TProcessExecutor workers(nWorkers); - res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); - } - else { + if(nWorkers > 1) { + ROOT::TProcessExecutor workers(nWorkers); + res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); + } else { for(UInt_t i = 0; i < fNumFolds; ++ i) { res.push_back(workItem(i)); } } - for(auto results: res) { - fResults.fFoldParameters.push_back(results); + for(auto results : res) { + fResults.fFoldParameters.push_back(results); } } diff --git a/tmva/tmva/src/VariableImportance.cxx b/tmva/tmva/src/VariableImportance.cxx index 52e81dc45df09..0d08664cff686 100644 --- a/tmva/tmva/src/VariableImportance.cxx +++ b/tmva/tmva/src/VariableImportance.cxx @@ -221,9 +221,11 @@ void TMVA::VariableImportance::EvaluateImportanceShort() for (UInt_t index = 0; index < nbits; index++){ if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); } - //Loading Dataset + DataLoaderCopy(seeddl,fDataLoader.get()); - seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), + fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); //Booking Seed fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); @@ -241,71 +243,73 @@ void TMVA::VariableImportance::EvaluateImportanceShort() fClassifier->fMethodsMap.clear(); auto workItem = [&](UInt_t workerID) { - uint32_t i = workerID; - if (x & (1 << i)) { - y = x & ~(1 << i); - std::bitset ybitset(y); - //need at least one variable - //NOTE: if subssed is zero then is the special case - //that count in xbitset is 1 - Double_t ny = log(x - y) / 0.693147; - if (y == 0) { - return make_pair(ny, 0.5); - } - - //creating loader for subseed - TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); - - //adding variables from subseed - for (UInt_t index = 0; index < nbits; index++) { - if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); - } - - //Loading Dataset - std::vector< std::shared_ptr > files = DataLoaderCopyMP(subseeddl,fDataLoader.get()); - subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - //Booking SubSeed - fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); - - //Train/Test/Evaluation - fClassifier->TrainAllMethods(); - fClassifier->TestAllMethods(); - fClassifier->EvaluateAllMethods(); - - //getting ROC - SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); - //importances[ny] += SROC - SSROC; + uint32_t i = workerID; + if (x & (1 << i)) { + y = x & ~(1 << i); + std::bitset ybitset(y); + //need at least one variable + //NOTE: if subssed is zero then is the special case + //that count in xbitset is 1 + Double_t ny = log(x - y) / 0.693147; + if (y == 0) { + return make_pair(ny, 0.5); + } - delete subseeddl; - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - DataLoaderCopyMPCloseFiles(files); + //creating loader for subseed + TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); - return make_pair((double)ny, (double)SSROC ); + //adding variables from subseed + for (UInt_t index = 0; index < nbits; index++) { + if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); } - else return make_pair(-1., (double)0.); + //Loading Dataset + std::vector> files = DataLoaderCopyMP(subseeddl,fDataLoader.get()); + subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), + fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + //Booking SubSeed + fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); + + //Train/Test/Evaluation + fClassifier->TrainAllMethods(); + fClassifier->TestAllMethods(); + fClassifier->EvaluateAllMethods(); + + //getting ROC + SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); + //importances[ny] += SROC - SSROC; + + delete subseeddl; + fClassifier->DeleteAllMethods(); + fClassifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + + return make_pair((double)ny, (double)SSROC ); + } + else + return make_pair(-1., (double)0.); }; - vector< pair > results; + vector> results; if(TMVA::gConfig().NWorkers() > 1) { ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); results = workers.Map(workItem, ROOT::TSeqI(32)); } else { - for(int i = 0; i < 32; ++ i) { + for (int i = 0; i < 32; ++i) { auto res = workItem(i); results.push_back(res); } } - for(auto res_pair: results) { - if(res_pair.first >= 0) - importances[res_pair.first] += SROC - res_pair.second; + for (auto res_pair : results) { + if (res_pair.first >= 0) + importances[res_pair.first] += SROC - res_pair.second; } Float_t normalization = 0.0; for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; - for(UInt_t i=0;i importances(nbits); - //Float_t importances_norm = 0; for (UInt_t i = 0; i < nbits; i++)importances[i] = 0; Float_t SROC, SSROC; //computed ROC value for every Seed and SubSeed - //x = range; - //int cnt = 0; std::unordered_mapused; auto workItem = [&](UInt_t workerID) { - while(true) { - workerID = rangen -> Integer(range); - if(!used[workerID] && workerID != 0) break; - } - std::bitset xbitset(workerID); //dataloader need at least one variable - - used[workerID] = 1; - //creating loader for seed - TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); - //adding variables from seed - for (UInt_t index = 0; index < nbits; index++) { - //if(xbitset[index]) - // std::cout << varNames[index] << " "; - if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); - } - - //Loading Dataset + while(true) { + workerID = rangen -> Integer(range); + if(!used[workerID] && workerID != 0) break; + } + std::bitset xbitset(workerID); //dataloader need at least one variable + + used[workerID] = 1; + // creating loader for seed + TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); + // adding variables from seed + for (UInt_t index = 0; index < nbits; index++) { + // if(xbitset[index]) + // std::cout << varNames[index] << " "; + if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); + } - std::vector< std::shared_ptr > files = DataLoaderCopyMP(seeddl,fDataLoader.get()); + // Loading Dataset + std::vector> files = DataLoaderCopyMP(seeddl,fDataLoader.get()); - seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), + fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - //Booking Seed - fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); + // Booking Seed + fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); - //Train/Test/Evaluation - fClassifier->TrainAllMethods(); - fClassifier->TestAllMethods(); - fClassifier->EvaluateAllMethods(); + // Train/Test/Evaluation + fClassifier->TrainAllMethods(); + fClassifier->TestAllMethods(); + fClassifier->EvaluateAllMethods(); - //getting ROC - SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle); + //getting ROC + SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle); - delete seeddl; + delete seeddl; - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - DataLoaderCopyMPCloseFiles(files); + fClassifier->DeleteAllMethods(); + fClassifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); - return make_pair(SROC, workerID); - }; + return make_pair(SROC, workerID); + }; - vector< pair > SROC_results; - ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); + vector> SROC_results; + ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); // Fill the pool with work - if(TMVA::gConfig().NWorkers() > 1) { - SROC_results = workers.Map(workItem, ROOT::TSeqI(std::min(range - 1, ULong_t(seeds)))); - } - else { - - for(UInt_t i = 0; i < std::min(range - 1, ULong_t(seeds)); ++ i) { - auto res = workItem(i); - SROC_results.push_back(res); - } - - } - - - for(auto res: SROC_results) { - auto xx = res.second; - auto SROC_ = res.first; - auto workItemsub = [&](UInt_t workerIDsub) { - uint32_t i = workerIDsub; - if (xx & (1 << i)) { - y = xx & ~(1 << i); - - std::bitset ybitset(y); - //need at least one variable - //NOTE: if subssed is zero then is the special case - //that count in xbitset is 1 - Double_t ny = log(xx - y) / 0.693147; - if (y == 0) { - return make_pair(ny, .5); - } - - //creating loader for subseed - TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); - - //adding variables from subseed - for (UInt_t index = 0; index < nbits; index++) { - if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); - } - - //Loading Dataset - std::vector< std::shared_ptr > files = DataLoaderCopyMP(subseeddl,fDataLoader.get()); - subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - //Booking SubSeed - fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); - - //Train/Test/Evaluation - fClassifier->TrainAllMethods(); - fClassifier->TestAllMethods(); - fClassifier->EvaluateAllMethods(); - - //getting ROC - SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); - //importance += SROC_ - SSROC; - - delete subseeddl; - - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - DataLoaderCopyMPCloseFiles(files); - - return make_pair((double)ny, (double)SSROC); - } - else return make_pair(-1.,(double)0.); - }; - vector< pair > results; - if(TMVA::gConfig().NWorkers() > 1) { - ROOT::TProcessExecutor workers_sub(TMVA::gConfig().NWorkers()); - - // Fill the pool with work - results = workers_sub.Map(workItemsub, ROOT::TSeqI(32)); - } - else { - for(int i = 0; i < 32; ++ i) { - auto res_sub = workItemsub(i); - results.push_back(res_sub); - } - - } - for(auto res_pair: results) { - importances[res_pair.first] += SROC_-res_pair.second; - } - } - + if(TMVA::gConfig().NWorkers() > 1) { + SROC_results = workers.Map(workItem, ROOT::TSeqI(std::min(range - 1, ULong_t(seeds)))); + } else { + for(UInt_t i = 0; i < std::min(range - 1, ULong_t(seeds)); ++ i) { + auto res = workItem(i); + SROC_results.push_back(res); + } + } + + + for(auto res: SROC_results) { + auto xx = res.second; + auto SROC_ = res.first; + auto workItemsub = [&](UInt_t workerIDsub) { + uint32_t i = workerIDsub; + if (xx & (1 << i)) { + y = xx & ~(1 << i); + + std::bitset ybitset(y); + //need at least one variable + //NOTE: if subssed is zero then is the special case + //that count in xbitset is 1 + Double_t ny = log(xx - y) / 0.693147; + if (y == 0) { + return make_pair(ny, .5); + } + + //creating loader for subseed + TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); + + //adding variables from subseed + for (UInt_t index = 0; index < nbits; index++) { + if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); + } + + //Loading Dataset + std::vector< std::shared_ptr > files = DataLoaderCopyMP(subseeddl,fDataLoader.get()); + subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), + fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + //Booking SubSeed + fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); + + //Train/Test/Evaluation + fClassifier->TrainAllMethods(); + fClassifier->TestAllMethods(); + fClassifier->EvaluateAllMethods(); + + //getting ROC + SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); + //importance += SROC_ - SSROC; + + delete subseeddl; + + fClassifier->DeleteAllMethods(); + fClassifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + + return make_pair((double)ny, (double)SSROC); + } else + return make_pair(-1.,(double)0.); + }; + vector> results; + if(TMVA::gConfig().NWorkers() > 1) { + ROOT::TProcessExecutor workers_sub(TMVA::gConfig().NWorkers()); + + // Fill the pool with work + results = workers_sub.Map(workItemsub, ROOT::TSeqI(32)); + } else { + for (int i = 0; i < 32; ++ i) { + auto res_sub = workItemsub(i); + results.push_back(res_sub); + } + } + for (auto res_pair : results) { + importances[res_pair.first] += SROC_-res_pair.second; + } + } Float_t normalization = 0.0; for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; @@ -512,9 +509,6 @@ void TMVA::VariableImportance::EvaluateImportanceAll() //vector to save importances std::vector importances(nbits); - //vector to save ROC-Integral values - //std::vector ROC(range); - //ROC[0]=0.5; for (UInt_t i = 0; i < nbits; i++) importances[i] = 0; Float_t SROC, SSROC; //computed ROC value @@ -522,84 +516,85 @@ void TMVA::VariableImportance::EvaluateImportanceAll() auto workItem = [&](UInt_t workerID) { - Float_t ROC; - ROC = 0.5; - std::bitset xbitset(workerID); - - if(workerID == 0) return ROC; - //creating loader for seed - TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); - - //adding variables from seed - for (UInt_t index = 0; index < nbits; index++) { - if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); - } - - std::vector< std::shared_ptr > files = DataLoaderCopyMP(seeddl,fDataLoader.get()); - seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - - TMVA::gConfig().SetSilent(kFALSE); - auto classifier =std::unique_ptr(new TMVA::Factory("VariableImportanceworker","!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification")); - classifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); - //Train/Test/Evaluation - //Booking Seed - - classifier->TrainAllMethods(); - classifier->TestAllMethods(); - classifier->EvaluateAllMethods(); - - //getting ROC - ROC = classifier->GetROCIntegral(xbitset.to_string(), methodTitle); - - delete seeddl; - classifier->DeleteAllMethods(); - classifier->fMethodsMap.clear(); - DataLoaderCopyMPCloseFiles(files); - return ROC; - }; - - vectorROC_result; - if(TMVA::gConfig().NWorkers() > 1) { - ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); - - // Fill the pool with work - ROC_result = workers.Map(workItem, ROOT::TSeqI(range)); - } - else { - for(UInt_t i = 0; i < range; ++ i) { - auto res = workItem(i); - ROC_result.push_back(res); - } - } - for ( x = 0; x xbitset(workerID); + + if(workerID == 0) return ROC; + // creating loader for seed + TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); + + // adding variables from seed + for (UInt_t index = 0; index < nbits; index++) { + if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); + } + + std::vector> files = DataLoaderCopyMP(seeddl,fDataLoader.get()); + seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), + fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + + TMVA::gConfig().SetSilent(kFALSE); + auto classifier =std::unique_ptr( + new TMVA::Factory("VariableImportanceworker", + "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification")); + classifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); + // Train/Test/Evaluation + // Booking Seed + + classifier->TrainAllMethods(); + classifier->TestAllMethods(); + classifier->EvaluateAllMethods(); + + // getting ROC + ROC = classifier->GetROCIntegral(xbitset.to_string(), methodTitle); + + delete seeddl; + classifier->DeleteAllMethods(); + classifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + return ROC; + }; + + vectorROC_result; + if(TMVA::gConfig().NWorkers() > 1) { + ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); + + // Fill the pool with work + ROC_result = workers.Map(workItem, ROOT::TSeqI(range)); + } else { + for(UInt_t i = 0; i < range; ++ i) { + auto res = workItem(i); + ROC_result.push_back(res); + } + } + for ( x = 0; x ybitset(y); - - Float_t ny = log(x - y) / 0.693147; - if (y == 0) { - importances[ny] = SROC - 0.5; - continue; - } - - //getting ROC - SSROC = ROC_result[y]; - importances[ny] += SROC - SSROC; - } + if (x & (1 << i)) { + y = x & ~(1 << i); + std::bitset ybitset(y); + + Float_t ny = log(x - y) / 0.693147; + if (y == 0) { + importances[ny] = SROC - 0.5; + continue; + } + // getting ROC + SSROC = ROC_result[y]; + importances[ny] += SROC - SSROC; + } } } Float_t normalization = 0.0; - for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; + for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; - for(UInt_t i=0;i(varNames[i])+" % "; - } - fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits,importances,varNames)); + for(UInt_t i=0;i(varNames[i])+" % "; + } + fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits,importances,varNames)); } From 691f6cdb47a14dab5a05a3d5d1301654778245d7 Mon Sep 17 00:00:00 2001 From: mammadhajili Date: Fri, 18 Aug 2017 11:29:42 +0200 Subject: [PATCH 3/4] Loading files fixes and clang format --- tmva/tmva/inc/TMVA/Config.h | 5 +-- tmva/tmva/inc/TMVA/DataLoader.h | 11 +++-- tmva/tmva/src/DataLoader.cxx | 43 +++++++++++++++++--- tmva/tmva/src/HyperParameterOptimisation.cxx | 12 +++++- 4 files changed, 55 insertions(+), 16 deletions(-) diff --git a/tmva/tmva/inc/TMVA/Config.h b/tmva/tmva/inc/TMVA/Config.h index f477f56cf14bb..863a684540a48 100644 --- a/tmva/tmva/inc/TMVA/Config.h +++ b/tmva/tmva/inc/TMVA/Config.h @@ -66,7 +66,7 @@ namespace TMVA { void SetDrawProgressBar( Bool_t d ) { fDrawProgressBar = d; } UInt_t NWorkers() const { return fNWorkers; } - void SetNWorkers ( UInt_t n ) { fNWorkers = n; } + void SetNWorkers (UInt_t n) { fNWorkers = n; } public: @@ -101,7 +101,6 @@ namespace TMVA { TString fOptionsReferenceFileDir; } fIONames; // Customisable weight file properties - private: // private constructor @@ -121,7 +120,7 @@ namespace TMVA { std::atomic fSilent; // no output at all std::atomic fWriteOptionsReference; // if set true: Configurable objects write file with option reference std::atomic fDrawProgressBar; // draw progress bar to indicate training evolution - std::atomic fNWorkers; // number of workers in multiprocessing parallelization + std::atomic fNWorkers; #else Bool_t fUseColoredConsole; // coloured standard output Bool_t fSilent; // no output at all diff --git a/tmva/tmva/inc/TMVA/DataLoader.h b/tmva/tmva/inc/TMVA/DataLoader.h index 8f0206dc1872b..767d834e89cfa 100644 --- a/tmva/tmva/inc/TMVA/DataLoader.h +++ b/tmva/tmva/inc/TMVA/DataLoader.h @@ -85,16 +85,15 @@ namespace TMVA { // special case: signal/background // Data input related - void SetInputTrees( const TString& signalFileName, const TString& backgroundFileName, + void SetInputTrees(const TString& signalFileName, const TString& backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0 ); - void SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut ); + void SetInputTrees( TTree *inputTree, const TCut& SigCut, const TCut& BgCut ); // Set input trees at once - void SetInputTrees( TTree* signal, TTree* background, - Double_t signalWeight=1.0, Double_t backgroundWeight=1.0) ; + void SetInputTrees( TTree *signal, TTree* background, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0) ; - void AddSignalTree( TTree* signal, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); + void AddSignalTree( TTree *signal, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); void AddSignalTree( TString datFileS, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); - void AddSignalTree( TTree* signal, Double_t weight, const TString& treetype ); + void AddSignalTree( TTree *signal, Double_t weight, const TString& treetype ); // ... depreciated, kept for backwards compatibility void SetSignalTree( TTree* signal, Double_t weight=1.0); diff --git a/tmva/tmva/src/DataLoader.cxx b/tmva/tmva/src/DataLoader.cxx index 1005a09c5b017..84ae52ab8bc36 100644 --- a/tmva/tmva/src/DataLoader.cxx +++ b/tmva/tmva/src/DataLoader.cxx @@ -868,9 +868,43 @@ std::vector> TMVA::DataLoaderCopyMP(TMVA::DataLoader *des { std::vector> vec_files; - for(std::vector::const_iterator treeinfo_signal=src->DataInput().Sbegin(), + std::map> map_files; + for(std::vector::const_iterator treeinfo = src->DataInput().Sbegin(); treeinfo != src->DataInput().Send(); ++ treeinfo) { + TTree *stree = treeinfo -> GetTree(); + TString sfileName = stree->GetCurrentFile()->GetName(); + std::shared_ptr sfile; + if(map_files.find(sfileName) == map_files.end()) { + sfile = std::shared_ptr(TFile::Open(sfileName)) ; + map_files[sfileName] = sfile; + vec_files.push_back(sfile); + } + else { + sfile = map_files[sfileName]; + } + + TTree* signalTree = (TTree*)sfile->Get(stree->GetName()); + des->AddSignalTree(signalTree); + } + for(std::vector::const_iterator treeinfo = src->DataInput().Bbegin(); treeinfo != src->DataInput().Bend(); ++ treeinfo) { + TTree *btree = treeinfo -> GetTree(); + TString bfileName = btree->GetCurrentFile()->GetName(); + std::shared_ptr bfile; + if(map_files.find(bfileName) == map_files.end()) { + bfile = std::shared_ptr(TFile::Open(bfileName)) ; + map_files[bfileName] = bfile; + vec_files.push_back(bfile); + } + else { + bfile = map_files[bfileName]; + } + + TTree* backgroundTree = (TTree*)bfile->Get(btree->GetName()); + des->AddBackgroundTree(backgroundTree); + } + + /*for(std::vector::const_iterator treeinfo_signal=src->DataInput().Sbegin(), treeinfo_back=src->DataInput().Bbegin(); - treeinfo_signal!=src->DataInput().Send(), treeinfo_back!=src->DataInput().Bend(); + treeinfo_signal!=src->DataInput().Send() | treeinfo_back!=src->DataInput().Bend(); treeinfo_signal++, treeinfo_back++) { TTree *stree = treeinfo_signal -> GetTree(); TTree *btree = treeinfo_back -> GetTree(); @@ -892,11 +926,10 @@ std::vector> TMVA::DataLoaderCopyMP(TMVA::DataLoader *des vec_files.push_back(sfile); vec_files.push_back(bfile); - } +}*/ return vec_files; } -void TMVA::DataLoaderCopyMPCloseFiles(std::vector> files) -{ +void TMVA::DataLoaderCopyMPCloseFiles(std::vector > files) { for(auto file: files) { file->Close(); } diff --git a/tmva/tmva/src/HyperParameterOptimisation.cxx b/tmva/tmva/src/HyperParameterOptimisation.cxx index 6c6171dc4a23a..348a699e3f4e5 100644 --- a/tmva/tmva/src/HyperParameterOptimisation.cxx +++ b/tmva/tmva/src/HyperParameterOptimisation.cxx @@ -69,7 +69,7 @@ void TMVA::HyperParameterOptimisationResult::Print() const TMVA::HyperParameterOptimisation::HyperParameterOptimisation(TMVA::DataLoader *dataloader):Envelope("HyperParameterOptimisation",dataloader), fFomType("Separation"), fFitType("Minuit"), - fNumFolds(5), + fNumFolds(4), fResults(), fClassifier(new TMVA::Factory("HyperParameterOptimisation","!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification")) { @@ -90,6 +90,7 @@ void TMVA::HyperParameterOptimisation::SetNumFolds(UInt_t i) void TMVA::HyperParameterOptimisation::Evaluate() { + cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; TString methodName = fMethod.GetValue("MethodName"); TString methodTitle = fMethod.GetValue("MethodTitle"); TString methodOptions = fMethod.GetValue("MethodOptions"); @@ -102,7 +103,8 @@ void TMVA::HyperParameterOptimisation::Evaluate() fResults.fMethodName = methodName; auto workItem = [&](UInt_t workerID) { - TString foldTitle = methodTitle; + TString foldTitle = methodTitle; + foldTitle += "_opt"; foldTitle += workerID+1; @@ -124,16 +126,22 @@ void TMVA::HyperParameterOptimisation::Evaluate() }; vector> res; auto nWorkers = TMVA::gConfig().NWorkers(); + cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; if(nWorkers > 1) { + cout << "I am here" << endl; ROOT::TProcessExecutor workers(nWorkers); + cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); + cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; } else { for(UInt_t i = 0; i < fNumFolds; ++ i) { res.push_back(workItem(i)); } } + cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; for(auto results : res) { fResults.fFoldParameters.push_back(results); } + cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; } From d34f6390324c5fb36470f008b97fadc15e150cc6 Mon Sep 17 00:00:00 2001 From: mammadhajili Date: Wed, 23 Aug 2017 17:50:20 +0200 Subject: [PATCH 4/4] Fixing clang-format issues --- tmva/tmva/inc/LinkDef4.h | 2 +- tmva/tmva/inc/TMVA/Config.h | 4 +- tmva/tmva/inc/TMVA/DataLoader.h | 34 +- tmva/tmva/src/Config.cxx | 10 +- tmva/tmva/src/CrossValidation.cxx | 40 +- tmva/tmva/src/DataLoader.cxx | 73 +- tmva/tmva/src/HyperParameterOptimisation.cxx | 87 ++- tmva/tmva/src/VariableImportance.cxx | 693 +++++++++---------- 8 files changed, 444 insertions(+), 499 deletions(-) diff --git a/tmva/tmva/inc/LinkDef4.h b/tmva/tmva/inc/LinkDef4.h index d8ccf917463f0..6a26f5e5aaa32 100644 --- a/tmva/tmva/inc/LinkDef4.h +++ b/tmva/tmva/inc/LinkDef4.h @@ -43,7 +43,7 @@ #pragma link C++ function TMVA::CreateVariableTransform; #pragma link C++ function TMVA::DataLoaderCopy; -#pragma link C++ function TMVA::DataLoaderCopy; +#pragma link C++ function TMVA::DataLoaderCopyMP; #pragma link C++ class std::map+; diff --git a/tmva/tmva/inc/TMVA/Config.h b/tmva/tmva/inc/TMVA/Config.h index 863a684540a48..2f924be9063e7 100644 --- a/tmva/tmva/inc/TMVA/Config.h +++ b/tmva/tmva/inc/TMVA/Config.h @@ -66,7 +66,7 @@ namespace TMVA { void SetDrawProgressBar( Bool_t d ) { fDrawProgressBar = d; } UInt_t NWorkers() const { return fNWorkers; } - void SetNWorkers (UInt_t n) { fNWorkers = n; } + void SetNWorkers(UInt_t n) { fNWorkers = n; } public: @@ -120,7 +120,7 @@ namespace TMVA { std::atomic fSilent; // no output at all std::atomic fWriteOptionsReference; // if set true: Configurable objects write file with option reference std::atomic fDrawProgressBar; // draw progress bar to indicate training evolution - std::atomic fNWorkers; + std::atomic fNWorkers; #else Bool_t fUseColoredConsole; // coloured standard output Bool_t fSilent; // no output at all diff --git a/tmva/tmva/inc/TMVA/DataLoader.h b/tmva/tmva/inc/TMVA/DataLoader.h index 767d834e89cfa..04f4535924797 100644 --- a/tmva/tmva/inc/TMVA/DataLoader.h +++ b/tmva/tmva/inc/TMVA/DataLoader.h @@ -85,16 +85,16 @@ namespace TMVA { // special case: signal/background // Data input related - void SetInputTrees(const TString& signalFileName, const TString& backgroundFileName, - Double_t signalWeight=1.0, Double_t backgroundWeight=1.0 ); - void SetInputTrees( TTree *inputTree, const TCut& SigCut, const TCut& BgCut ); + void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight = 1.0, + Double_t backgroundWeight = 1.0); + void SetInputTrees(TTree *inputTree, const TCut &SigCut, const TCut &BgCut); // Set input trees at once - void SetInputTrees( TTree *signal, TTree* background, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0) ; - - void AddSignalTree( TTree *signal, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); - void AddSignalTree( TString datFileS, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); - void AddSignalTree( TTree *signal, Double_t weight, const TString& treetype ); + void + SetInputTrees(TTree *signal, TTree *background, Double_t signalWeight = 1.0, Double_t backgroundWeight = 1.0); + void AddSignalTree(TTree *signal, Double_t weight = 1.0, Types::ETreeType treetype = Types::kMaxTreeType); + void AddSignalTree(TString datFileS, Double_t weight = 1.0, Types::ETreeType treetype = Types::kMaxTreeType); + void AddSignalTree(TTree *signal, Double_t weight, const TString &treetype); // ... depreciated, kept for backwards compatibility void SetSignalTree( TTree* signal, Double_t weight=1.0); @@ -109,9 +109,9 @@ namespace TMVA { void SetBackgroundWeightExpression( const TString& variable ); // special case: regression - void AddRegressionTree( TTree* tree, Double_t weight = 1.0, - Types::ETreeType treetype = Types::kMaxTreeType ) { - AddTree( tree, "Regression", weight, "", treetype ); + void AddRegressionTree(TTree *tree, Double_t weight = 1.0, Types::ETreeType treetype = Types::kMaxTreeType) + { + AddTree(tree, "Regression", weight, "", treetype); } // general @@ -156,8 +156,8 @@ namespace TMVA { // ... deprecated, kept for backwards compatibility void PrepareTrainingAndTestTree( const TCut& cut, Int_t Ntrain, Int_t Ntest = -1 ); - void PrepareTrainingAndTestTree( const TCut& cut, Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest, - const TString& otherOpt="SplitMode=Random:!V" ); + void PrepareTrainingAndTestTree(const TCut &cut, Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest, + const TString &otherOpt = "SplitMode=Random:!V"); void PrepareTrainingAndTestTree( int foldNumber, Types::ETreeType tt ); @@ -171,12 +171,11 @@ namespace TMVA { //Copy method use in VI and CV DEPRECATED: you can just call Clone DataLoader *dl2=(DataLoader *)dl1->Clone("dl2") DataLoader* MakeCopy(TString name); - friend void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src); + friend void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src); DataInputHandler& DataInput() { return *fDataInputHandler; } private: - DataSetInfo& DefaultDataSetInfo(); void SetInputTreesFromEventAssignTrees(); @@ -188,7 +187,6 @@ namespace TMVA { DataSetManager* fDataSetManager; // DSMTEST - DataInputHandler* fDataInputHandler;//-> std::vector fDefaultTrfs; // list of transformations on default DataSet @@ -199,9 +197,7 @@ namespace TMVA { Bool_t fVerbose; // verbose mode // flag determining the way training and test data are assigned to DataLoader - enum DataAssignType { kUndefined = 0, - kAssignTrees, - kAssignEvents }; + enum DataAssignType { kUndefined = 0, kAssignTrees, kAssignEvents }; DataAssignType fDataAssignType; // flags for data assigning std::vector fTrainAssignTree; // for each class: tmp tree if user wants to assign the events directly std::vector fTestAssignTree; // for each class: tmp tree if user wants to assign the events directly diff --git a/tmva/tmva/src/Config.cxx b/tmva/tmva/src/Config.cxx index 5d7b5ec613d13..21f4bb796dd70 100644 --- a/tmva/tmva/src/Config.cxx +++ b/tmva/tmva/src/Config.cxx @@ -52,13 +52,9 @@ TMVA::Config& TMVA::gConfig() { return TMVA::Config::Instance(); } //////////////////////////////////////////////////////////////////////////////// /// constructor - set defaults -TMVA::Config::Config() : - fUseColoredConsole(kTRUE), - fSilent(kFALSE), - fWriteOptionsReference( kFALSE ), - fDrawProgressBar(kFALSE), - fNWorkers(1), - fLogger(new MsgLogger("Config")) +TMVA::Config::Config() + : fUseColoredConsole(kTRUE), fSilent(kFALSE), fWriteOptionsReference(kFALSE), fDrawProgressBar(kFALSE), fNWorkers(1), + fLogger(new MsgLogger("Config")) { // plotting fVariablePlotting.fTimesRMS = 8.0; diff --git a/tmva/tmva/src/CrossValidation.cxx b/tmva/tmva/src/CrossValidation.cxx index 27d41e82ecd01..91d53508950e0 100644 --- a/tmva/tmva/src/CrossValidation.cxx +++ b/tmva/tmva/src/CrossValidation.cxx @@ -135,8 +135,8 @@ void TMVA::CrossValidation::Evaluate() TString foldTitle = methodTitle; foldTitle += "_fold"; foldTitle += workerID + 1; - auto classifier = std::unique_ptr(new TMVA::Factory( - "CrossValidation","!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification")); + auto classifier = std::unique_ptr(new TMVA::Factory( + "CrossValidation", "!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification")); fDataLoader->PrepareFoldDataSet(workerID, TMVA::Types::kTesting); MethodBase *smethod = classifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); @@ -152,7 +152,7 @@ void TMVA::CrossValidation::Evaluate() // Store results auto res = classifier->GetROCIntegral(fDataLoader->GetName(), methodTitle); - TGraph* gr = classifier->GetROCCurve(fDataLoader->GetName(), methodTitle, true); + TGraph *gr = classifier->GetROCCurve(fDataLoader->GetName(), methodTitle, true); gr->SetLineColor(workerID + 1); gr->SetLineWidth(2); gr->SetTitle(foldTitle.Data()); @@ -165,7 +165,7 @@ void TMVA::CrossValidation::Evaluate() fResults.fEff01s.push_back(smethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err)); fResults.fEff10s.push_back(smethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err)); fResults.fEff30s.push_back(smethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err)); - fResults.fEffAreas.push_back(smethod->GetEfficiency("" , Types::kTesting, err)); + fResults.fEffAreas.push_back(smethod->GetEfficiency("", Types::kTesting, err)); fResults.fTrainEff01s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.01")); fResults.fTrainEff10s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.10")); fResults.fTrainEff30s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.30")); @@ -177,30 +177,28 @@ void TMVA::CrossValidation::Evaluate() classifier->fMethodsMap.clear(); return make_pair(res, workerID); - }; - vector> res; + }; + vector> res; - auto nWorkers = TMVA::gConfig().NWorkers(); + auto nWorkers = TMVA::gConfig().NWorkers(); - if(nWorkers > 1) { + if (nWorkers > 1) { ROOT::TProcessExecutor workers(nWorkers); res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); - } - - else { - for(UInt_t i = 0; i < fNumFolds; ++ i) { - auto res_pair = workItem(i); - res.push_back(res_pair); - } - } + } else { + for (UInt_t i = 0; i < fNumFolds; ++i) { + auto res_pair = workItem(i); + res.push_back(res_pair); + } + } - for(auto res_pair: res) { + for (auto res_pair : res) { fResults.fROCs[res_pair.second] = res_pair.first; - } + } - TMVA::gConfig().SetSilent(kFALSE); - Log() << kINFO << "Evaluation done." << Endl; - TMVA::gConfig().SetSilent(kTRUE); + TMVA::gConfig().SetSilent(kFALSE); + Log() << kINFO << "Evaluation done." << Endl; + TMVA::gConfig().SetSilent(kTRUE); } const TMVA::CrossValidationResult& TMVA::CrossValidation::GetResults() const { diff --git a/tmva/tmva/src/DataLoader.cxx b/tmva/tmva/src/DataLoader.cxx index 84ae52ab8bc36..db2ef30c944b8 100644 --- a/tmva/tmva/src/DataLoader.cxx +++ b/tmva/tmva/src/DataLoader.cxx @@ -856,83 +856,58 @@ void TMVA::DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src) for( std::vector::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();treeinfo++) { - des->AddSignalTree((*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); + des->AddSignalTree((*treeinfo).GetTree(), (*treeinfo).GetWeight(), (*treeinfo).GetTreeType()); } for( std::vector::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();treeinfo++) { - des->AddBackgroundTree((*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType()); + des->AddBackgroundTree((*treeinfo).GetTree(), (*treeinfo).GetWeight(), (*treeinfo).GetTreeType()); } } std::vector> TMVA::DataLoaderCopyMP(TMVA::DataLoader *des, TMVA::DataLoader *src) { - - std::vector> vec_files; + std::vector> vec_files; std::map> map_files; - for(std::vector::const_iterator treeinfo = src->DataInput().Sbegin(); treeinfo != src->DataInput().Send(); ++ treeinfo) { - TTree *stree = treeinfo -> GetTree(); + for (std::vector::const_iterator treeinfo = src->DataInput().Sbegin(); treeinfo != src->DataInput().Send(); + ++treeinfo) { + TTree *stree = treeinfo->GetTree(); TString sfileName = stree->GetCurrentFile()->GetName(); std::shared_ptr sfile; - if(map_files.find(sfileName) == map_files.end()) { - sfile = std::shared_ptr(TFile::Open(sfileName)) ; + if (map_files.find(sfileName) == map_files.end()) { + sfile = std::shared_ptr(TFile::Open(sfileName)); map_files[sfileName] = sfile; vec_files.push_back(sfile); - } - else { + } else { sfile = map_files[sfileName]; } - TTree* signalTree = (TTree*)sfile->Get(stree->GetName()); + TTree *signalTree = (TTree *)sfile->Get(stree->GetName()); des->AddSignalTree(signalTree); } - for(std::vector::const_iterator treeinfo = src->DataInput().Bbegin(); treeinfo != src->DataInput().Bend(); ++ treeinfo) { - TTree *btree = treeinfo -> GetTree(); + for (std::vector::const_iterator treeinfo = src->DataInput().Bbegin(); treeinfo != src->DataInput().Bend(); + ++treeinfo) { + TTree *btree = treeinfo->GetTree(); TString bfileName = btree->GetCurrentFile()->GetName(); std::shared_ptr bfile; - if(map_files.find(bfileName) == map_files.end()) { - bfile = std::shared_ptr(TFile::Open(bfileName)) ; + if (map_files.find(bfileName) == map_files.end()) { + bfile = std::shared_ptr(TFile::Open(bfileName)); map_files[bfileName] = bfile; vec_files.push_back(bfile); - } - else { + } else { bfile = map_files[bfileName]; } - TTree* backgroundTree = (TTree*)bfile->Get(btree->GetName()); + TTree *backgroundTree = (TTree *)bfile->Get(btree->GetName()); des->AddBackgroundTree(backgroundTree); } - /*for(std::vector::const_iterator treeinfo_signal=src->DataInput().Sbegin(), - treeinfo_back=src->DataInput().Bbegin(); - treeinfo_signal!=src->DataInput().Send() | treeinfo_back!=src->DataInput().Bend(); - treeinfo_signal++, treeinfo_back++) { - TTree *stree = treeinfo_signal -> GetTree(); - TTree *btree = treeinfo_back -> GetTree(); - - TString sfileName = stree->GetCurrentFile()->GetName(); - TString bfileName = btree->GetCurrentFile()->GetName(); - std::shared_ptr sfile( TFile::Open(sfileName)) ; - std::shared_ptr bfile; - if (bfileName != sfileName) { - bfile = std::shared_ptr(TFile::Open(bfileName)); - } - else { - bfile = sfile; - } - TTree* signalTree = (TTree*)sfile->Get(stree->GetName()); - TTree* backgTree = (TTree*)bfile->Get(btree->GetName()); - des->AddSignalTree(signalTree); - des->AddBackgroundTree(backgTree); - - vec_files.push_back(sfile); - vec_files.push_back(bfile); -}*/ - return vec_files; -} -void TMVA::DataLoaderCopyMPCloseFiles(std::vector > files) { - for(auto file: files) { - file->Close(); - } + return vec_files; +} +void TMVA::DataLoaderCopyMPCloseFiles(std::vector> files) +{ + for (auto file : files) { + file->Close(); + } } //////////////////////////////////////////////////////////////////////////////// /// returns the correlation matrix of datasets diff --git a/tmva/tmva/src/HyperParameterOptimisation.cxx b/tmva/tmva/src/HyperParameterOptimisation.cxx index 348a699e3f4e5..17dc0a08bf618 100644 --- a/tmva/tmva/src/HyperParameterOptimisation.cxx +++ b/tmva/tmva/src/HyperParameterOptimisation.cxx @@ -66,12 +66,11 @@ void TMVA::HyperParameterOptimisationResult::Print() const } -TMVA::HyperParameterOptimisation::HyperParameterOptimisation(TMVA::DataLoader *dataloader):Envelope("HyperParameterOptimisation",dataloader), - fFomType("Separation"), - fFitType("Minuit"), - fNumFolds(4), - fResults(), - fClassifier(new TMVA::Factory("HyperParameterOptimisation","!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification")) +TMVA::HyperParameterOptimisation::HyperParameterOptimisation(TMVA::DataLoader *dataloader) + : Envelope("HyperParameterOptimisation", dataloader), fFomType("Separation"), fFitType("Minuit"), fNumFolds(4), + fResults(), fClassifier(new TMVA::Factory( + "HyperParameterOptimisation", + "!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification")) { fFoldStatus=kFALSE; } @@ -91,57 +90,49 @@ void TMVA::HyperParameterOptimisation::SetNumFolds(UInt_t i) void TMVA::HyperParameterOptimisation::Evaluate() { cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; - TString methodName = fMethod.GetValue("MethodName"); - TString methodTitle = fMethod.GetValue("MethodTitle"); - TString methodOptions = fMethod.GetValue("MethodOptions"); - - if(!fFoldStatus) - { - fDataLoader->MakeKFoldDataSet(fNumFolds); - fFoldStatus=kTRUE; - } - fResults.fMethodName = methodName; - auto workItem = [&](UInt_t workerID) { + TString methodName = fMethod.GetValue("MethodName"); + TString methodTitle = fMethod.GetValue("MethodTitle"); + TString methodOptions = fMethod.GetValue("MethodOptions"); - TString foldTitle = methodTitle; + if (!fFoldStatus) { + fDataLoader->MakeKFoldDataSet(fNumFolds); + fFoldStatus = kTRUE; + } + fResults.fMethodName = methodName; + auto workItem = [&](UInt_t workerID) { - foldTitle += "_opt"; - foldTitle += workerID+1; + TString foldTitle = methodTitle; - Event::SetIsTraining(kTRUE); - fDataLoader->PrepareFoldDataSet(workerID, TMVA::Types::kTraining); + foldTitle += "_opt"; + foldTitle += workerID + 1; - auto smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); + Event::SetIsTraining(kTRUE); + fDataLoader->PrepareFoldDataSet(workerID, TMVA::Types::kTraining); - auto params=smethod->OptimizeTuningParameters(fFomType,fFitType); + auto smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); - smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTraining, Types::kClassification); + auto params = smethod->OptimizeTuningParameters(fFomType, fFitType); - fClassifier->DeleteAllMethods(); + smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTraining, Types::kClassification); - fClassifier->fMethodsMap.clear(); + fClassifier->DeleteAllMethods(); - return params; + fClassifier->fMethodsMap.clear(); - }; - vector> res; - auto nWorkers = TMVA::gConfig().NWorkers(); - cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; - if(nWorkers > 1) { - cout << "I am here" << endl; - ROOT::TProcessExecutor workers(nWorkers); - cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; - res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); - cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; - } else { - for(UInt_t i = 0; i < fNumFolds; ++ i) { - res.push_back(workItem(i)); - } - } - cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; - for(auto results : res) { - fResults.fFoldParameters.push_back(results); - } - cout << "Number of Workers : " << TMVA::gConfig().NWorkers() << endl; + return params; + }; + vector> res; + auto nWorkers = TMVA::gConfig().NWorkers(); + if (nWorkers > 1) { + ROOT::TProcessExecutor workers(nWorkers); + res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); + } else { + for (UInt_t i = 0; i < fNumFolds; ++i) { + res.push_back(workItem(i)); + } + } + for (auto results : res) { + fResults.fFoldParameters.push_back(results); + } } diff --git a/tmva/tmva/src/VariableImportance.cxx b/tmva/tmva/src/VariableImportance.cxx index 0d08664cff686..2efd502e5efc3 100644 --- a/tmva/tmva/src/VariableImportance.cxx +++ b/tmva/tmva/src/VariableImportance.cxx @@ -190,195 +190,196 @@ TH1F* TMVA::VariableImportance::GetImportance(const UInt_t nbits,std::vector("MethodName"); - TString methodTitle = fMethod.GetValue("MethodTitle"); - TString methodOptions = fMethod.GetValue("MethodOptions"); - - uint32_t x = 0; - uint32_t y = 0; - //getting number of variables and variable names from loader - const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables(); - std::vector varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables(); - - ULong_t range = Sum(nbits); - - //vector to save importances - std::vector importances(nbits); - for (UInt_t i = 0; i < nbits; i++)importances[i] = 0; - - Float_t SROC, SSROC; //computed ROC value for every Seed and SubSeed - - x = range; - - std::bitset xbitset(x); - if (x == 0) Log()<AddVariable(varNames[index], 'F'); - } - - DataLoaderCopy(seeddl,fDataLoader.get()); - seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), - fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), - fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - //Booking Seed - fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); - - //Train/Test/Evaluation - fClassifier->TrainAllMethods(); - fClassifier->TestAllMethods(); - fClassifier->EvaluateAllMethods(); - - //getting ROC - SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle); - - delete seeddl; - - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - - auto workItem = [&](UInt_t workerID) { - uint32_t i = workerID; - if (x & (1 << i)) { - y = x & ~(1 << i); - std::bitset ybitset(y); - //need at least one variable - //NOTE: if subssed is zero then is the special case - //that count in xbitset is 1 - Double_t ny = log(x - y) / 0.693147; - if (y == 0) { + TString methodName = fMethod.GetValue("MethodName"); + TString methodTitle = fMethod.GetValue("MethodTitle"); + TString methodOptions = fMethod.GetValue("MethodOptions"); + + uint32_t x = 0; + uint32_t y = 0; + // getting number of variables and variable names from loader + const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables(); + std::vector varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables(); + + ULong_t range = Sum(nbits); + + // vector to save importances + std::vector importances(nbits); + for (UInt_t i = 0; i < nbits; i++) + importances[i] = 0; + + Float_t SROC, SSROC; // computed ROC value for every Seed and SubSeed + + x = range; + + std::bitset xbitset(x); + if (x == 0) + Log() << kFATAL << "Error: need at least one variable."; // dataloader need at least one variable + // creating loader for seed + TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); + + // adding variables from seed + for (UInt_t index = 0; index < nbits; index++) { + if (xbitset[index]) + seeddl->AddVariable(varNames[index], 'F'); + } + + DataLoaderCopy(seeddl, fDataLoader.get()); + + seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), + fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + // Booking Seed + fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); + + // Train/Test/Evaluation + fClassifier->TrainAllMethods(); + fClassifier->TestAllMethods(); + fClassifier->EvaluateAllMethods(); + + // getting ROC + SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle); + + delete seeddl; + + fClassifier->DeleteAllMethods(); + fClassifier->fMethodsMap.clear(); + + auto workItem = [&](UInt_t workerID) { + uint32_t i = workerID; + if (x & (1 << i)) { + y = x & ~(1 << i); + std::bitset ybitset(y); + // need at least one variable + // NOTE: if subssed is zero then is the special case + // that count in xbitset is 1 + Double_t ny = log(x - y) / 0.693147; + if (y == 0) { return make_pair(ny, 0.5); - } - - //creating loader for subseed - TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); - - //adding variables from subseed - for (UInt_t index = 0; index < nbits; index++) { - if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); - } - - //Loading Dataset - std::vector> files = DataLoaderCopyMP(subseeddl,fDataLoader.get()); - subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), - fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), - fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - //Booking SubSeed - fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); - - //Train/Test/Evaluation - fClassifier->TrainAllMethods(); - fClassifier->TestAllMethods(); - fClassifier->EvaluateAllMethods(); - - //getting ROC - SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); - //importances[ny] += SROC - SSROC; - - delete subseeddl; - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - DataLoaderCopyMPCloseFiles(files); - - return make_pair((double)ny, (double)SSROC ); - } - else - return make_pair(-1., (double)0.); - }; - vector> results; - if(TMVA::gConfig().NWorkers() > 1) { - ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); - results = workers.Map(workItem, ROOT::TSeqI(32)); - } - else { - for (int i = 0; i < 32; ++i) { - auto res = workItem(i); - results.push_back(res); - } - } - for (auto res_pair : results) { - if (res_pair.first >= 0) - importances[res_pair.first] += SROC - res_pair.second; - } - - Float_t normalization = 0.0; - for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; - - for(UInt_t i = 0; i < nbits; i++){ - //adding values - fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization); - //adding sufix - fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue(varNames[i])+" % "; + } + + // creating loader for subseed + TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); + + // adding variables from subseed + for (UInt_t index = 0; index < nbits; index++) { + if (ybitset[index]) + subseeddl->AddVariable(varNames[index], 'F'); + } + + // Loading Dataset + std::vector> files = DataLoaderCopyMP(subseeddl, fDataLoader.get()); + subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), + fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + // Booking SubSeed + fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); + + // Train/Test/Evaluation + fClassifier->TrainAllMethods(); + fClassifier->TestAllMethods(); + fClassifier->EvaluateAllMethods(); + + // getting ROC + SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); + // importances[ny] += SROC - SSROC; + + delete subseeddl; + fClassifier->DeleteAllMethods(); + fClassifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + + return make_pair((double)ny, (double)SSROC); + } else + return make_pair(-1., (double)0.); + }; + vector> results; + if (TMVA::gConfig().NWorkers() > 1) { + ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); + results = workers.Map(workItem, ROOT::TSeqI(32)); + } else { + for (int i = 0; i < 32; ++i) { + auto res = workItem(i); + results.push_back(res); } - fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits,importances,varNames)); + } + for (auto res_pair : results) { + if (res_pair.first >= 0) + importances[res_pair.first] += SROC - res_pair.second; + } + Float_t normalization = 0.0; + for (UInt_t i = 0; i < nbits; i++) + normalization += importances[i]; + + for (UInt_t i = 0; i < nbits; i++) { + // adding values + fResults.fImportanceValues[varNames[i]] = (100.0 * importances[i] / normalization); + // adding sufix + fResults.fImportanceValues[varNames[i]] = fResults.fImportanceValues.GetValue(varNames[i]) + " % "; + } + fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits, importances, varNames)); } //////////////////////////////////////////////////////////////////////////////// void TMVA::VariableImportance::EvaluateImportanceRandom(UInt_t seeds) { - TString methodName = fMethod.GetValue("MethodName"); - TString methodTitle = fMethod.GetValue("MethodTitle"); - TString methodOptions = fMethod.GetValue("MethodOptions"); + TString methodName = fMethod.GetValue("MethodName"); + TString methodTitle = fMethod.GetValue("MethodTitle"); + TString methodOptions = fMethod.GetValue("MethodOptions"); - TRandom3 *rangen = new TRandom3(0); //Random Gen. + TRandom3 *rangen = new TRandom3(0); // Random Gen. - uint32_t y = 0; + uint32_t y = 0; - //getting number of variables and variable names from loader - const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables(); - std::vector varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables(); + // getting number of variables and variable names from loader + const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables(); + std::vector varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables(); - ULong_t range = pow(2, nbits); + ULong_t range = pow(2, nbits); - //vector to save importances - std::vector importances(nbits); + // vector to save importances + std::vector importances(nbits); - for (UInt_t i = 0; i < nbits; i++)importances[i] = 0; + for (UInt_t i = 0; i < nbits; i++) + importances[i] = 0; - Float_t SROC, SSROC; //computed ROC value for every Seed and SubSeed + Float_t SROC, SSROC; // computed ROC value for every Seed and SubSeed - std::unordered_mapused; - auto workItem = [&](UInt_t workerID) { + std::unordered_map used; + auto workItem = [&](UInt_t workerID) { - while(true) { - workerID = rangen -> Integer(range); - if(!used[workerID] && workerID != 0) break; - } - std::bitset xbitset(workerID); //dataloader need at least one variable + while (true) { + workerID = rangen->Integer(range); + if (!used[workerID] && workerID != 0) + break; + } + std::bitset xbitset(workerID); // dataloader need at least one variable - used[workerID] = 1; - // creating loader for seed + used[workerID] = 1; + // creating loader for seed TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); - // adding variables from seed + // adding variables from seed for (UInt_t index = 0; index < nbits; index++) { - // if(xbitset[index]) - // std::cout << varNames[index] << " "; - if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); + if (xbitset[index]) + seeddl->AddVariable(varNames[index], 'F'); } // Loading Dataset + std::vector> files = DataLoaderCopyMP(seeddl, fDataLoader.get()); - std::vector> files = DataLoaderCopyMP(seeddl,fDataLoader.get()); - - seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - // Booking Seed + // Booking Seed fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); - // Train/Test/Evaluation + // Train/Test/Evaluation fClassifier->TrainAllMethods(); fClassifier->TestAllMethods(); fClassifier->EvaluateAllMethods(); - //getting ROC + // getting ROC SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle); delete seeddl; @@ -387,214 +388,202 @@ void TMVA::VariableImportance::EvaluateImportanceRandom(UInt_t seeds) fClassifier->fMethodsMap.clear(); DataLoaderCopyMPCloseFiles(files); - return make_pair(SROC, workerID); - }; - - vector> SROC_results; - ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); - - // Fill the pool with work - if(TMVA::gConfig().NWorkers() > 1) { - SROC_results = workers.Map(workItem, ROOT::TSeqI(std::min(range - 1, ULong_t(seeds)))); - } else { - - for(UInt_t i = 0; i < std::min(range - 1, ULong_t(seeds)); ++ i) { - auto res = workItem(i); - SROC_results.push_back(res); - } - } - - - for(auto res: SROC_results) { - auto xx = res.second; - auto SROC_ = res.first; - auto workItemsub = [&](UInt_t workerIDsub) { - uint32_t i = workerIDsub; - if (xx & (1 << i)) { - y = xx & ~(1 << i); - - std::bitset ybitset(y); - //need at least one variable - //NOTE: if subssed is zero then is the special case - //that count in xbitset is 1 - Double_t ny = log(xx - y) / 0.693147; - if (y == 0) { - return make_pair(ny, .5); - } - - //creating loader for subseed - TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); - - //adding variables from subseed - for (UInt_t index = 0; index < nbits; index++) { - if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F'); - } - - //Loading Dataset - std::vector< std::shared_ptr > files = DataLoaderCopyMP(subseeddl,fDataLoader.get()); - subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), - fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), - fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - //Booking SubSeed - fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); - - //Train/Test/Evaluation - fClassifier->TrainAllMethods(); - fClassifier->TestAllMethods(); - fClassifier->EvaluateAllMethods(); - - //getting ROC - SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); - //importance += SROC_ - SSROC; - - delete subseeddl; - - fClassifier->DeleteAllMethods(); - fClassifier->fMethodsMap.clear(); - DataLoaderCopyMPCloseFiles(files); - - return make_pair((double)ny, (double)SSROC); - } else - return make_pair(-1.,(double)0.); - }; - vector> results; - if(TMVA::gConfig().NWorkers() > 1) { - ROOT::TProcessExecutor workers_sub(TMVA::gConfig().NWorkers()); - - // Fill the pool with work - results = workers_sub.Map(workItemsub, ROOT::TSeqI(32)); - } else { - for (int i = 0; i < 32; ++ i) { - auto res_sub = workItemsub(i); - results.push_back(res_sub); - } - } - for (auto res_pair : results) { - importances[res_pair.first] += SROC_-res_pair.second; - } - } + return make_pair(SROC, workerID); + }; - Float_t normalization = 0.0; - for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; + vector> SROC_results; + ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); - for(UInt_t i=0;i(varNames[i])+" % "; - } - fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits,importances,varNames)); - delete rangen; + // Fill the pool with work + if (TMVA::gConfig().NWorkers() > 1) { + SROC_results = workers.Map(workItem, ROOT::TSeqI(std::min(range - 1, ULong_t(seeds)))); + } else { + for (UInt_t i = 0; i < std::min(range - 1, ULong_t(seeds)); ++i) { + auto res = workItem(i); + SROC_results.push_back(res); + } + } + + for (auto res : SROC_results) { + auto xx = res.second; + auto SROC_ = res.first; + auto workItemsub = [&](UInt_t workerIDsub) { + uint32_t i = workerIDsub; + if (xx & (1 << i)) { + std::bitset ybitset(y); + // need at least one variable + // NOTE: if subssed is zero then is the special case + // that count in xbitset is 1 + Double_t ny = log(xx - y) / 0.693147; + if (y == 0) { + return make_pair(ny, .5); + } + //creating loader for subseed + TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string()); + + //adding variables from subseed + for (UInt_t index = 0; index < nbits; index++) { + if (ybitset[index]) + subseeddl->AddVariable(varNames[index], 'F'); + } + // Loading Dataset + std::vector> files = DataLoaderCopyMP(subseeddl, fDataLoader.get()); + subseeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), + fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + // Booking SubSeed + fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions); + + // Train/Test/Evaluation + fClassifier->TrainAllMethods(); + fClassifier->TestAllMethods(); + fClassifier->EvaluateAllMethods(); + + // getting ROC + SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle); + delete subseeddl; + + fClassifier->DeleteAllMethods(); + fClassifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + return make_pair((double)ny, (double)SSROC); + } else + return make_pair(-1., (double)0.); + }; + vector> results; + if (TMVA::gConfig().NWorkers() > 1) { + ROOT::TProcessExecutor workers_sub(TMVA::gConfig().NWorkers()); + // Fill the pool with work + results = workers_sub.Map(workItemsub, ROOT::TSeqI(32)); + } else { + for (int i = 0; i < 32; ++i) { + auto res_sub = workItemsub(i); + results.push_back(res_sub); + } + } + for (auto res_pair : results) { + importances[res_pair.first] += SROC_ - res_pair.second; + } + } + + Float_t normalization = 0.0; + for (UInt_t i = 0; i < nbits; i++) + normalization += importances[i]; + + for (UInt_t i = 0; i < nbits; i++) { + // adding values + fResults.fImportanceValues[varNames[i]] = (100.0 * importances[i] / normalization); + // adding sufix + fResults.fImportanceValues[varNames[i]] = fResults.fImportanceValues.GetValue(varNames[i]) + " % "; + } + fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits, importances, varNames)); + delete rangen; } //////////////////////////////////////////////////////////////////////////////// void TMVA::VariableImportance::EvaluateImportanceAll() { + TString methodName = fMethod.GetValue("MethodName"); + TString methodTitle = fMethod.GetValue("MethodTitle"); + TString methodOptions = fMethod.GetValue("MethodOptions"); - TString methodName = fMethod.GetValue("MethodName"); - TString methodTitle = fMethod.GetValue("MethodTitle"); - TString methodOptions = fMethod.GetValue("MethodOptions"); + uint32_t x = 0; + uint32_t y = 0; + + // getting number of variables and variable names from loader + const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables(); + std::vector varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables(); + + ULong_t range = pow(2, nbits); + + // vector to save importances + std::vector importances(nbits); + + for (UInt_t i = 0; i < nbits; i++) + importances[i] = 0; - uint32_t x = 0; - uint32_t y = 0; - - //getting number of variables and variable names from loader - const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables(); - std::vector varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables(); - - ULong_t range = pow(2, nbits); - - //vector to save importances - std::vector importances(nbits); - - for (UInt_t i = 0; i < nbits; i++) importances[i] = 0; - - Float_t SROC, SSROC; //computed ROC value - - - auto workItem = [&](UInt_t workerID) { - - Float_t ROC; - ROC = 0.5; - std::bitset xbitset(workerID); - - if(workerID == 0) return ROC; - // creating loader for seed - TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); - - // adding variables from seed - for (UInt_t index = 0; index < nbits; index++) { - if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F'); - } - - std::vector> files = DataLoaderCopyMP(seeddl,fDataLoader.get()); - seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), - fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), - fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); - - TMVA::gConfig().SetSilent(kFALSE); - auto classifier =std::unique_ptr( - new TMVA::Factory("VariableImportanceworker", - "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification")); - classifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); - // Train/Test/Evaluation - // Booking Seed - - classifier->TrainAllMethods(); - classifier->TestAllMethods(); - classifier->EvaluateAllMethods(); - - // getting ROC - ROC = classifier->GetROCIntegral(xbitset.to_string(), methodTitle); - - delete seeddl; - classifier->DeleteAllMethods(); - classifier->fMethodsMap.clear(); - DataLoaderCopyMPCloseFiles(files); - return ROC; - }; - - vectorROC_result; - if(TMVA::gConfig().NWorkers() > 1) { - ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); - - // Fill the pool with work - ROC_result = workers.Map(workItem, ROOT::TSeqI(range)); - } else { - for(UInt_t i = 0; i < range; ++ i) { - auto res = workItem(i); - ROC_result.push_back(res); - } - } - for ( x = 0; x ybitset(y); - - Float_t ny = log(x - y) / 0.693147; - if (y == 0) { - importances[ny] = SROC - 0.5; - continue; - } - - // getting ROC - SSROC = ROC_result[y]; - importances[ny] += SROC - SSROC; + Float_t SROC, SSROC; // computed ROC value + + auto workItem = [&](UInt_t workerID) { + Float_t ROC; + ROC = 0.5; + std::bitset xbitset(workerID); + + if (workerID == 0) + return ROC; + // creating loader for seed + TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string()); + // adding variables from seed + for (UInt_t index = 0; index < nbits; index++) { + if (xbitset[index]) + seeddl->AddVariable(varNames[index], 'F'); + } + std::vector> files = DataLoaderCopyMP(seeddl, fDataLoader.get()); + seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), + fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), + fDataLoader->GetDefaultDataSetInfo().GetSplitOptions()); + + TMVA::gConfig().SetSilent(kFALSE); + auto classifier = std::unique_ptr( + new TMVA::Factory("VariableImportanceworker", + "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification")); + classifier->BookMethod(seeddl, methodName, methodTitle, methodOptions); + + classifier->TrainAllMethods(); + classifier->TestAllMethods(); + classifier->EvaluateAllMethods(); + // getting ROC + ROC = classifier->GetROCIntegral(xbitset.to_string(), methodTitle); + + delete seeddl; + classifier->DeleteAllMethods(); + classifier->fMethodsMap.clear(); + DataLoaderCopyMPCloseFiles(files); + return ROC; + }; + + vector ROC_result; + if (TMVA::gConfig().NWorkers() > 1) { + ROOT::TProcessExecutor workers(TMVA::gConfig().NWorkers()); + // Fill the pool with work + ROC_result = workers.Map(workItem, ROOT::TSeqI(range)); + } else { + for (UInt_t i = 0; i < range; ++i) { + auto res = workItem(i); + ROC_result.push_back(res); + } + } + for (x = 0; x < range; x++) { + SROC = ROC_result[x]; + for (uint32_t i = 0; i < NBITS; ++i) { + if (x & (1 << i)) { + y = x & ~(1 << i); + std::bitset ybitset(y); + + Float_t ny = log(x - y) / 0.693147; + if (y == 0) { + importances[ny] = SROC - 0.5; + continue; } - } - } - Float_t normalization = 0.0; - for (UInt_t i = 0; i < nbits; i++) normalization += importances[i]; - for(UInt_t i=0;i(varNames[i])+" % "; + // getting ROC + SSROC = ROC_result[y]; + importances[ny] += SROC - SSROC; + } } - fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits,importances,varNames)); + } + Float_t normalization = 0.0; + for (UInt_t i = 0; i < nbits; i++) + normalization += importances[i]; + + for (UInt_t i = 0; i < nbits; i++) { + // adding values + fResults.fImportanceValues[varNames[i]] = (100.0 * importances[i] / normalization); + // adding sufix + fResults.fImportanceValues[varNames[i]] = fResults.fImportanceValues.GetValue(varNames[i]) + " % "; + } + + fResults.fImportanceHist = std::shared_ptr(GetImportance(nbits, importances, varNames)); }