-
Notifications
You must be signed in to change notification settings - Fork 1.4k
TMVA MultiProcessing #858
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TMVA MultiProcessing #858
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,12 +33,13 @@ | |
| #include <vector> | ||
| #include <map> | ||
| #include "TCut.h" | ||
| #include <memory> | ||
|
|
||
| #include "TMVA/Factory.h" | ||
| #include "TMVA/Types.h" | ||
| #include "TMVA/DataSet.h" | ||
| #include "TFile.h" | ||
|
|
||
| class TFile; | ||
| class TTree; | ||
| class TDirectory; | ||
| class TH2; | ||
|
|
@@ -84,16 +85,15 @@ namespace TMVA { | |
| // special case: signal/background | ||
|
|
||
| // Data input related | ||
| void SetInputTrees( const TString& signalFileName, const TString& backgroundFileName, | ||
| void SetInputTrees(const TString& signalFileName, const TString& backgroundFileName, | ||
| Double_t signalWeight=1.0, Double_t backgroundWeight=1.0 ); | ||
| void SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut ); | ||
| void SetInputTrees( TTree *inputTree, const TCut& SigCut, const TCut& BgCut ); | ||
| // Set input trees at once | ||
| void SetInputTrees( TTree* signal, TTree* background, | ||
| Double_t signalWeight=1.0, Double_t backgroundWeight=1.0) ; | ||
| void SetInputTrees( TTree *signal, TTree* background, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0) ; | ||
|
|
||
| void AddSignalTree( TTree* signal, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); | ||
| void AddSignalTree( TTree *signal, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); | ||
| void AddSignalTree( TString datFileS, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType ); | ||
| void AddSignalTree( TTree* signal, Double_t weight, const TString& treetype ); | ||
| void AddSignalTree( TTree *signal, Double_t weight, const TString& treetype ); | ||
|
|
||
| // ... depreciated, kept for backwards compatibility | ||
| void SetSignalTree( TTree* signal, Double_t weight=1.0); | ||
|
|
@@ -109,9 +109,9 @@ namespace TMVA { | |
| void SetBackgroundWeightExpression( const TString& variable ); | ||
|
|
||
| // special case: regression | ||
| void AddRegressionTree( TTree* tree, Double_t weight = 1.0, | ||
| Types::ETreeType treetype = Types::kMaxTreeType ) { | ||
| AddTree( tree, "Regression", weight, "", treetype ); | ||
| void AddRegressionTree( TTree* tree, Double_t weight = 1.0, | ||
| Types::ETreeType treetype = Types::kMaxTreeType ) { | ||
| AddTree( tree, "Regression", weight, "", treetype ); | ||
| } | ||
|
|
||
| // general | ||
|
|
@@ -153,10 +153,10 @@ namespace TMVA { | |
| void PrepareTrainingAndTestTree( const TCut& cut, const TString& splitOpt ); | ||
| void PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt ); | ||
|
|
||
| // ... deprecated, kept for backwards compatibility | ||
| // ... deprecated, kept for backwards compatibility | ||
| void PrepareTrainingAndTestTree( const TCut& cut, Int_t Ntrain, Int_t Ntest = -1 ); | ||
|
|
||
| void PrepareTrainingAndTestTree( const TCut& cut, Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest, | ||
| void PrepareTrainingAndTestTree( const TCut& cut, Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest, | ||
| const TString& otherOpt="SplitMode=Random:!V" ); | ||
|
|
||
| void PrepareTrainingAndTestTree( int foldNumber, Types::ETreeType tt ); | ||
|
|
@@ -168,15 +168,15 @@ namespace TMVA { | |
| const DataSetInfo& GetDefaultDataSetInfo(){ return DefaultDataSetInfo(); } | ||
|
|
||
| TH2* GetCorrelationMatrix(const TString& className); | ||
|
|
||
| //Copy method use in VI and CV DEPRECATED: you can just call Clone DataLoader *dl2=(DataLoader *)dl1->Clone("dl2") | ||
| DataLoader* MakeCopy(TString name); | ||
| friend void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src); | ||
| friend void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src); | ||
| DataInputHandler& DataInput() { return *fDataInputHandler; } | ||
|
|
||
| private: | ||
|
|
||
|
|
||
| DataSetInfo& DefaultDataSetInfo(); | ||
| void SetInputTreesFromEventAssignTrees(); | ||
|
|
||
|
|
@@ -188,7 +188,7 @@ namespace TMVA { | |
|
|
||
| DataSetManager* fDataSetManager; // DSMTEST | ||
|
|
||
|
|
||
| DataInputHandler* fDataInputHandler;//-> | ||
|
|
||
| std::vector<TMVA::VariableTransformBase*> fDefaultTrfs; // list of transformations on default DataSet | ||
|
|
@@ -199,7 +199,7 @@ namespace TMVA { | |
| Bool_t fVerbose; // verbose mode | ||
|
|
||
| // flag determining the way training and test data are assigned to DataLoader | ||
| enum DataAssignType { kUndefined = 0, | ||
| enum DataAssignType { kUndefined = 0, | ||
| kAssignTrees, | ||
| kAssignEvents }; | ||
| DataAssignType fDataAssignType; // flags for data assigning | ||
|
|
@@ -216,7 +216,7 @@ namespace TMVA { | |
| Int_t fATreeType = 0; // type of event (=classIndex) | ||
| Float_t fATreeWeight = 0.0; // weight of the event | ||
| std::vector<Float_t> fATreeEvent; // event variables | ||
|
|
||
| Types::EAnalysisType fAnalysisType; // the training type | ||
|
|
||
| Bool_t fMakeFoldDataSet; // flag telling if the DataSet folds have been done | ||
|
|
@@ -226,7 +226,8 @@ namespace TMVA { | |
| ClassDef(DataLoader,3); | ||
| }; | ||
| void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src); | ||
| std::vector<std::shared_ptr<TFile>> DataLoaderCopyMP(TMVA::DataLoader *des, TMVA::DataLoader *src); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do these need to be public? Would it be better to use private with Envelope as a friend? I think the intention is that only the Envelope should be able to use it. I would then propose that we enforce this. We can always open the interface up later, but not the other way around.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mammadhajili could you answer the comments of @ashlaban please? |
||
| void DataLoaderCopyMPCloseFiles(std::vector<std::shared_ptr<TFile>> files); | ||
| } // namespace TMVA | ||
|
|
||
| #endif | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,7 @@ | |
| #include "TMVA/ResultsClassification.h" | ||
| #include "TMVA/tmvaglob.h" | ||
| #include "TMVA/Types.h" | ||
| #include "ROOT/TProcessExecutor.hxx" | ||
|
|
||
| #include "TSystem.h" | ||
| #include "TAxis.h" | ||
|
|
@@ -20,6 +21,7 @@ | |
|
|
||
| #include <iostream> | ||
| #include <memory> | ||
| using namespace std; | ||
|
|
||
| /*! \class TMVA::CrossValidationResult | ||
| \ingroup TMVA | ||
|
|
@@ -126,16 +128,17 @@ void TMVA::CrossValidation::Evaluate() | |
| fFoldStatus=kTRUE; | ||
| } | ||
|
|
||
| // Process K folds | ||
| for(UInt_t i=0; i<fNumFolds; ++i){ | ||
| Log() << kDEBUG << "Fold (" << methodTitle << "): " << i << Endl; | ||
| auto workItem = [&](UInt_t workerID) { | ||
|
|
||
| Log() << kDEBUG << "Fold (" << methodTitle << "): " << workerID << Endl; | ||
| // Get specific fold of dataset and setup method | ||
| TString foldTitle = methodTitle; | ||
| foldTitle += "_fold"; | ||
| foldTitle += i+1; | ||
|
|
||
| fDataLoader->PrepareFoldDataSet(i, TMVA::Types::kTesting); | ||
| MethodBase* smethod = fClassifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); | ||
| foldTitle += workerID + 1; | ||
| auto classifier = std::unique_ptr<Factory>(new TMVA::Factory( | ||
| "CrossValidation","!V:!ROC:Silent:!ModelPersistence:!Color:!DrawProgressBar:AnalysisType=Classification")); | ||
| fDataLoader->PrepareFoldDataSet(workerID, TMVA::Types::kTesting); | ||
| MethodBase *smethod = classifier->BookMethod(fDataLoader.get(), methodName, methodTitle, methodOptions); | ||
|
|
||
| // Train method | ||
| Event::SetIsTraining(kTRUE); | ||
|
|
@@ -147,10 +150,10 @@ void TMVA::CrossValidation::Evaluate() | |
| smethod->TestClassification(); | ||
|
|
||
| // Store results | ||
| fResults.fROCs[i] = fClassifier->GetROCIntegral(fDataLoader->GetName(),methodTitle); | ||
| auto res = classifier->GetROCIntegral(fDataLoader->GetName(), methodTitle); | ||
|
|
||
| TGraph* gr = fClassifier->GetROCCurve(fDataLoader->GetName(), methodTitle, true); | ||
| gr->SetLineColor(i+1); | ||
| TGraph* gr = classifier->GetROCCurve(fDataLoader->GetName(), methodTitle, true); | ||
| gr->SetLineColor(workerID + 1); | ||
| gr->SetLineWidth(2); | ||
| gr->SetTitle(foldTitle.Data()); | ||
| fResults.fROCCurves->Add(gr); | ||
|
|
@@ -159,24 +162,45 @@ void TMVA::CrossValidation::Evaluate() | |
| fResults.fSeps.push_back(smethod->GetSeparation()); | ||
|
|
||
| Double_t err; | ||
| fResults.fEff01s.push_back(smethod->GetEfficiency("Efficiency:0.01",Types::kTesting, err)); | ||
| fResults.fEff10s.push_back(smethod->GetEfficiency("Efficiency:0.10",Types::kTesting,err)); | ||
| fResults.fEff30s.push_back(smethod->GetEfficiency("Efficiency:0.30",Types::kTesting,err)); | ||
| fResults.fEffAreas.push_back(smethod->GetEfficiency("" ,Types::kTesting,err)); | ||
| fResults.fEff01s.push_back(smethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err)); | ||
| fResults.fEff10s.push_back(smethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err)); | ||
| fResults.fEff30s.push_back(smethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err)); | ||
| fResults.fEffAreas.push_back(smethod->GetEfficiency("" , Types::kTesting, err)); | ||
| fResults.fTrainEff01s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.01")); | ||
| fResults.fTrainEff10s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.10")); | ||
| fResults.fTrainEff30s.push_back(smethod->GetTrainingEfficiency("Efficiency:0.30")); | ||
|
|
||
| // Clean-up for this fold | ||
| smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTesting, Types::kClassification); | ||
| smethod->Data()->DeleteResults(smethod->GetMethodName(), Types::kTraining, Types::kClassification); | ||
| fClassifier->DeleteAllMethods(); | ||
| fClassifier->fMethodsMap.clear(); | ||
| } | ||
|
|
||
| TMVA::gConfig().SetSilent(kFALSE); | ||
| Log() << kINFO << "Evaluation done." << Endl; | ||
| TMVA::gConfig().SetSilent(kTRUE); | ||
| classifier->DeleteAllMethods(); | ||
| classifier->fMethodsMap.clear(); | ||
|
|
||
| return make_pair(res, workerID); | ||
| }; | ||
| vector<pair<double, UInt_t>> res; | ||
|
|
||
| auto nWorkers = TMVA::gConfig().NWorkers(); | ||
|
|
||
| if(nWorkers > 1) { | ||
| ROOT::TProcessExecutor workers(nWorkers); | ||
| res = workers.Map(workItem, ROOT::TSeqI(fNumFolds)); | ||
| } | ||
|
|
||
| else { | ||
|
||
| for(UInt_t i = 0; i < fNumFolds; ++ i) { | ||
| auto res_pair = workItem(i); | ||
| res.push_back(res_pair); | ||
| } | ||
| } | ||
|
|
||
| for(auto res_pair: res) { | ||
| fResults.fROCs[res_pair.second] = res_pair.first; | ||
| } | ||
|
|
||
| TMVA::gConfig().SetSilent(kFALSE); | ||
| Log() << kINFO << "Evaluation done." << Endl; | ||
| TMVA::gConfig().SetSilent(kTRUE); | ||
| } | ||
|
|
||
| const TMVA::CrossValidationResult& TMVA::CrossValidation::GetResults() const { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we also export the DataLoaderMP copy here?