From 7e14e1625d3ea3383d6b27ff24ad5e50c29a950c Mon Sep 17 00:00:00 2001 From: ashlaban Date: Fri, 12 May 2017 19:05:10 +0200 Subject: [PATCH 1/2] Add multiclass ROC curves for TMVAMulticlassGui Introduces a new (for multiclass anyway) button in the gui that when clicked displays one ROC curve per class. Each curve contains the performance of all methods for that class. Uses the new ROCCurve class to calculate the curves. --- tmva/tmva/inc/TMVA/ResultsMulticlass.h | 1 + tmva/tmva/src/Factory.cxx | 298 +++++++++--------- tmva/tmva/src/MethodBase.cxx | 1 + tmva/tmva/src/ResultsMulticlass.cxx | 61 ++++ tmva/tmvagui/CMakeLists.txt | 2 +- tmva/tmvagui/Module.mk | 2 +- tmva/tmvagui/inc/LinkDef.h | 3 +- .../tmvagui/inc/TMVA/efficienciesMulticlass.h | 25 ++ tmva/tmvagui/src/TMVAMultiClassGui.cxx | 84 ++--- tmva/tmvagui/src/efficienciesMulticlass.cxx | 291 +++++++++++++++++ 10 files changed, 577 insertions(+), 191 deletions(-) create mode 100644 tmva/tmvagui/inc/TMVA/efficienciesMulticlass.h create mode 100644 tmva/tmvagui/src/efficienciesMulticlass.cxx diff --git a/tmva/tmva/inc/TMVA/ResultsMulticlass.h b/tmva/tmva/inc/TMVA/ResultsMulticlass.h index 09697ca7360b0..f61f15699c240 100644 --- a/tmva/tmva/inc/TMVA/ResultsMulticlass.h +++ b/tmva/tmva/inc/TMVA/ResultsMulticlass.h @@ -76,6 +76,7 @@ namespace TMVA { std::vector& GetAchievableEff(){return fAchievableEff;} std::vector& GetAchievablePur(){return fAchievablePur;} // histogramming + void CreateMulticlassPerformanceHistos(TString prefix); void CreateMulticlassHistos( TString prefix, Int_t nbins, Int_t nbins_high); Double_t EstimatorFunction( std::vector & ); diff --git a/tmva/tmva/src/Factory.cxx b/tmva/tmva/src/Factory.cxx index f15399cf823d9..0507a22f4f80e 100644 --- a/tmva/tmva/src/Factory.cxx +++ b/tmva/tmva/src/Factory.cxx @@ -1303,58 +1303,59 @@ void TMVA::Factory::EvaluateAllMethods( void ) theMethod->WriteEvaluationHistosToFile(Types::kTesting); theMethod->WriteEvaluationHistosToFile(Types::kTraining); } - } - else if (theMethod->DoMulticlass()) { - doMulticlass = kTRUE; - Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl; - if(!IsSilentFile()) - { - Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl; - theMethod->WriteEvaluationHistosToFile(Types::kTesting); - theMethod->WriteEvaluationHistosToFile(Types::kTraining); - } - theMethod->TestMulticlass(); - multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur)); - - nmeth_used[0]++; - mname[0].push_back( theMethod->GetMethodName() ); - } - else { - - Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl; - isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0; - - // perform the evaluation - theMethod->TestClassification(); - - - // evaluate the classifier - mname[isel].push_back( theMethod->GetMethodName() ); - sig[isel].push_back ( theMethod->GetSignificance() ); - sep[isel].push_back ( theMethod->GetSeparation() ); - roc[isel].push_back ( theMethod->GetROCIntegral() ); - - Double_t err; - eff01[isel].push_back( theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err) ); - eff01err[isel].push_back( err ); - eff10[isel].push_back( theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err) ); - eff10err[isel].push_back( err ); - eff30[isel].push_back( theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err) ); - eff30err[isel].push_back( err ); - effArea[isel].push_back( theMethod->GetEfficiency("", Types::kTesting, err) ); // computes the area (average) - - trainEff01[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.01") ); // the first pass takes longer - trainEff10[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.10") ); - trainEff30[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.30") ); - - nmeth_used[isel]++; + } else if (theMethod->DoMulticlass()) { + // ==================================================================== + // === Multiclass evaluation + // ==================================================================== + doMulticlass = kTRUE; + Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl; + + theMethod->TestMulticlass(); + multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur)); + + // FIXME: This code snippet is repeated in other branches + if (not IsSilentFile()) { + Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl; + theMethod->WriteEvaluationHistosToFile(Types::kTesting); + theMethod->WriteEvaluationHistosToFile(Types::kTraining); + } - if(!IsSilentFile()) - { - Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl; - theMethod->WriteEvaluationHistosToFile(Types::kTesting); - theMethod->WriteEvaluationHistosToFile(Types::kTraining); - } + nmeth_used[0]++; + mname[0].push_back(theMethod->GetMethodName()); + } else { + + Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl; + isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0; + + // perform the evaluation + theMethod->TestClassification(); + + // evaluate the classifier + mname[isel].push_back(theMethod->GetMethodName()); + sig[isel].push_back(theMethod->GetSignificance()); + sep[isel].push_back(theMethod->GetSeparation()); + roc[isel].push_back(theMethod->GetROCIntegral()); + + Double_t err; + eff01[isel].push_back(theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err)); + eff01err[isel].push_back(err); + eff10[isel].push_back(theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err)); + eff10err[isel].push_back(err); + eff30[isel].push_back(theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err)); + eff30err[isel].push_back(err); + effArea[isel].push_back(theMethod->GetEfficiency("", Types::kTesting, err)); // computes the area (average) + + trainEff01[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer + trainEff10[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.10")); + trainEff30[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.30")); + + nmeth_used[isel]++; + + if (!IsSilentFile()) { + Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl; + theMethod->WriteEvaluationHistosToFile(Types::kTesting); + theMethod->WriteEvaluationHistosToFile(Types::kTraining); + } } } if (doRegression) { @@ -1687,104 +1688,109 @@ void TMVA::Factory::EvaluateAllMethods( void ) Log() << kINFO << hLine << Endl; Log() << kINFO << Endl; } - } - else { - if(fROC) - { - Log().EnableOutput(); - gConfig().SetSilent(kFALSE); - Log() << Endl; - TString hLine = "-------------------------------------------------------------------------------------------------------------------"; - Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl; - Log() << kINFO << hLine << Endl; - Log() << kINFO << "DataSet MVA " << Endl; - Log() << kINFO << "Name: Method: ROC-integ" << Endl; - -// Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error): | Sepa- Signifi- " << Endl; -// Log() << kDEBUG << "Name: Method: @B=0.01 @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl; - Log() << kDEBUG << hLine << Endl; - for (Int_t k=0; k<2; k++) { - if (k == 1 && nmeth_used[k] > 0) { + } else { + // Binary classification + if (fROC) { + Log().EnableOutput(); + gConfig().SetSilent(kFALSE); + Log() << Endl; + TString hLine = "------------------------------------------------------------------------------------------" + "-------------------------"; + Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl; Log() << kINFO << hLine << Endl; - Log() << kINFO << "Input Variables: " << Endl << hLine << Endl; - } - for (Int_t i=0; i(GetMethod(itrMap->first,mname[k][i])); - if(theMethod==0) continue; - TMVA::Results *results=theMethod->Data()->GetResults(mname[k][i],Types::kTesting,Types::kClassification); - std::vector *mvaRes = dynamic_cast(results)->GetValueVector(); - std::vector *mvaResType = dynamic_cast(results)->GetValueVectorTypes(); - Double_t fROCalcValue = 0; - TMVA::ROCCurve *fROCCurve = nullptr; - if (mvaResType->size() != 0) { - fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType); - fROCalcValue = fROCCurve->GetROCIntegral(); - } - - if (sep[k][i] < 0 || sig[k][i] < 0) { - // cannot compute separation/significance -> no MVA (usually for Cuts) - Log() << kINFO << Form("%-13s %-15s: %#1.3f", - itrMap->first.Data(), - (const char*)mname[k][i], - effArea[k][i]) << Endl; - -// Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f %#1.3f | -- --", -// itrMap->first.Data(), -// (const char*)mname[k][i], -// eff01[k][i], Int_t(1000*eff01err[k][i]), -// eff10[k][i], Int_t(1000*eff10err[k][i]), -// eff30[k][i], Int_t(1000*eff30err[k][i]), -// effArea[k][i],fROCalcValue) << Endl; - } - else { - Log() << kINFO << Form("%-13s %-15s: %#1.3f", - itrMap->first.Data(), - (const char*)mname[k][i], - fROCalcValue) << Endl; -// Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f %#1.3f | %#1.3f %#1.3f", -// itrMap->first.Data(), -// (const char*)mname[k][i], -// eff01[k][i], Int_t(1000*eff01err[k][i]), -// eff10[k][i], Int_t(1000*eff10err[k][i]), -// eff30[k][i], Int_t(1000*eff30err[k][i]), -// effArea[k][i],fROCalcValue, -// sep[k][i], sig[k][i]) << Endl; - } - if (fROCCurve) delete fROCCurve; - } - } - Log() << kINFO << hLine << Endl; - Log() << kINFO << Endl; - Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl; - Log() << kINFO << hLine << Endl; - Log() << kINFO << "DataSet MVA Signal efficiency: from test sample (from training sample) " << Endl; - Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 " << Endl; - Log() << kINFO << hLine << Endl; - for (Int_t k=0; k<2; k++) { - if (k == 1 && nmeth_used[k] > 0) { + Log() << kINFO << "DataSet MVA " << Endl; + Log() << kINFO << "Name: Method: ROC-integ" << Endl; + + // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error): + // | Sepa- Signifi- " << Endl; Log() << kDEBUG << "Name: Method: @B=0.01 + // @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl; + Log() << kDEBUG << hLine << Endl; + for (Int_t k = 0; k < 2; k++) { + if (k == 1 && nmeth_used[k] > 0) { + Log() << kINFO << hLine << Endl; + Log() << kINFO << "Input Variables: " << Endl << hLine << Endl; + } + for (Int_t i = 0; i < nmeth_used[k]; i++) { + if (k == 1) mname[k][i].ReplaceAll("Variable_", ""); + + MethodBase *theMethod = dynamic_cast(GetMethod(itrMap->first, mname[k][i])); + if (theMethod == 0) continue; + TMVA::Results *results = + theMethod->Data()->GetResults(mname[k][i], Types::kTesting, Types::kClassification); + std::vector *mvaRes = dynamic_cast(results)->GetValueVector(); + std::vector *mvaResType = + dynamic_cast(results)->GetValueVectorTypes(); + Double_t fROCalcValue = 0; + TMVA::ROCCurve *fROCCurve = nullptr; + if (mvaResType->size() != 0) { + fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType); + fROCalcValue = fROCCurve->GetROCIntegral(); + } + + if (sep[k][i] < 0 || sig[k][i] < 0) { + // cannot compute separation/significance -> no MVA (usually for Cuts) + Log() << kINFO + << Form("%-13s %-15s: %#1.3f", itrMap->first.Data(), (const char *)mname[k][i], + effArea[k][i]) + << Endl; + + // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) + // %#1.3f %#1.3f | -- --", + // itrMap->first.Data(), + // (const char*)mname[k][i], + // eff01[k][i], Int_t(1000*eff01err[k][i]), + // eff10[k][i], Int_t(1000*eff10err[k][i]), + // eff30[k][i], Int_t(1000*eff30err[k][i]), + // effArea[k][i],fROCalcValue) << Endl; + } else { + Log() << kINFO + << Form("%-13s %-15s: %#1.3f", itrMap->first.Data(), (const char *)mname[k][i], fROCalcValue) + << Endl; + // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) + // %#1.3f %#1.3f | %#1.3f %#1.3f", + // itrMap->first.Data(), + // (const char*)mname[k][i], + // eff01[k][i], Int_t(1000*eff01err[k][i]), + // eff10[k][i], Int_t(1000*eff10err[k][i]), + // eff30[k][i], Int_t(1000*eff30err[k][i]), + // effArea[k][i],fROCalcValue, + // sep[k][i], sig[k][i]) << Endl; + } + if (fROCCurve) delete fROCCurve; + } + } Log() << kINFO << hLine << Endl; - Log() << kINFO << "Input Variables: " << Endl << hLine << Endl; - } - for (Int_t i=0; i((*methods)[i]); - if(theMethod==0) continue; - - Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)", - theMethod->fDataSetInfo.GetName(), - (const char*)mname[k][i], - eff01[k][i],trainEff01[k][i], - eff10[k][i],trainEff10[k][i], - eff30[k][i],trainEff30[k][i]) << Endl; - } - } - Log() << kINFO << hLine << Endl; - Log() << kINFO << Endl; + Log() << kINFO << Endl; + Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl; + Log() << kINFO << hLine << Endl; + Log() << kINFO + << "DataSet MVA Signal efficiency: from test sample (from training sample) " + << Endl; + Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 " + << Endl; + Log() << kINFO << hLine << Endl; + for (Int_t k = 0; k < 2; k++) { + if (k == 1 && nmeth_used[k] > 0) { + Log() << kINFO << hLine << Endl; + Log() << kINFO << "Input Variables: " << Endl << hLine << Endl; + } + for (Int_t i = 0; i < nmeth_used[k]; i++) { + if (k == 1) mname[k][i].ReplaceAll("Variable_", ""); + MethodBase *theMethod = dynamic_cast((*methods)[i]); + if (theMethod == 0) continue; + + Log() << kINFO + << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)", + theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i], + trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i]) + << Endl; + } + } + Log() << kINFO << hLine << Endl; + Log() << kINFO << Endl; - if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); - }//end fROC + if (gTools().CheckForSilentOption(GetOptions())) Log().InhibitOutput(); + } // end fROC } if(!IsSilentFile()) { diff --git a/tmva/tmva/src/MethodBase.cxx b/tmva/tmva/src/MethodBase.cxx index 1355f11583bee..faa5ee317e9bd 100644 --- a/tmva/tmva/src/MethodBase.cxx +++ b/tmva/tmva/src/MethodBase.cxx @@ -821,6 +821,7 @@ void TMVA::MethodBase::AddMulticlassOutput(Types::ETreeType type) TString histNamePrefix(GetTestvarName()); histNamePrefix += (type==Types::kTraining?"_Train":"_Test"); resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH ); + resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix); } //////////////////////////////////////////////////////////////////////////////// diff --git a/tmva/tmva/src/ResultsMulticlass.cxx b/tmva/tmva/src/ResultsMulticlass.cxx index bc4787db1db36..bb838003cce93 100644 --- a/tmva/tmva/src/ResultsMulticlass.cxx +++ b/tmva/tmva/src/ResultsMulticlass.cxx @@ -40,9 +40,11 @@ Class which takes the results of a multiclass classification #include "TMVA/GeneticFitter.h" #include "TMVA/MsgLogger.h" #include "TMVA/Results.h" +#include "TMVA/ROCCurve.h" #include "TMVA/Tools.h" #include "TMVA/Types.h" +#include "TGraph.h" #include "TH1F.h" #include @@ -176,6 +178,65 @@ std::vector TMVA::ResultsMulticlass::GetBestMultiClassCuts(UInt_t targ return result; } +//////////////////////////////////////////////////////////////////////////////// +/// Create performance graphs for this classifier a multiclass setting. +/// Requires that the method has already been evaluated (that a resultset +/// already exists.) +/// +/// Currently uses the new way of calculating ROC Curves. If anything looks +/// fishy, please contact the ROOT TMVA team. +/// + +void TMVA::ResultsMulticlass::CreateMulticlassPerformanceHistos(TString prefix) +{ + DataSet *ds = GetDataSet(); + ds->SetCurrentType(GetTreeType()); + const DataSetInfo *dsi = GetDataSetInfo(); + + UInt_t numClasses = dsi->GetNClasses(); + + std::vector> *rawMvaRes = GetValueVector(); + + for (size_t iClass = 0; iClass < numClasses; ++iClass) { + // Format data + // TODO: Replace with calls to GetMvaValuesPerClass + std::vector mvaRes; + std::vector mvaResTypes; + std::vector mvaResWeights; + + // Vector transpose due to values being stored as + // [ [0, 1, 2], [0, 1, 2], ... ] + // in ResultsMulticlass::GetValueVector. + mvaRes.reserve(rawMvaRes->size()); + for (auto item : *rawMvaRes) { + mvaRes.push_back(item[iClass]); + } + + auto eventCollection = ds->GetEventCollection(); + mvaResTypes.reserve(eventCollection.size()); + mvaResWeights.reserve(eventCollection.size()); + for (auto ev : eventCollection) { + mvaResTypes.push_back(ev->GetClass() == iClass); + mvaResWeights.push_back(ev->GetWeight()); + } + + // Get ROC Curve + ROCCurve *roc = new ROCCurve(mvaRes, mvaResTypes, mvaResWeights); + TGraph *rocGraph = new TGraph(*(roc->GetROCCurve())); + delete roc; + + // Style ROC Curve + TString className = dsi->GetClassInfo(iClass)->GetName(); + TString name = Form("%s_rejBvsS_%s", prefix.Data(), className.Data()); + TString title = Form("%s_%s", prefix.Data(), className.Data()); + rocGraph->SetName(name); + rocGraph->SetTitle(title); + + // Store ROC Curve + Store(rocGraph); + } +} + //////////////////////////////////////////////////////////////////////////////// /// this function fills the mva response histos for multiclass classification diff --git a/tmva/tmvagui/CMakeLists.txt b/tmva/tmvagui/CMakeLists.txt index fd1b3f1a428bb..9540cc0f8d919 100644 --- a/tmva/tmvagui/CMakeLists.txt +++ b/tmva/tmvagui/CMakeLists.txt @@ -12,7 +12,7 @@ if(NOT CMAKE_PROJECT_NAME STREQUAL ROOT) endif() set(headers1 annconvergencetest.h deviations.h mvaeffs.h PlotFoams.h TMVAGui.h - BDTControlPlots.h correlationscatters.h efficiencies.h mvas.h probas.h + BDTControlPlots.h correlationscatters.h efficiencies.h efficienciesMulticlass.h mvas.h probas.h BDT.h correlationscattersMultiClass.h likelihoodrefs.h mvasMulticlass.h regression_averagedevs.h TMVAMultiClassGui.h BDT_Reg.h correlations.h mvaweights.h rulevisCorr.h TMVARegGui.h BoostControlPlots.h correlationsMultiClass.h network.h rulevis.h variables.h diff --git a/tmva/tmvagui/Module.mk b/tmva/tmvagui/Module.mk index 368b6a8c6c0d3..ba20261d70754 100644 --- a/tmva/tmvagui/Module.mk +++ b/tmva/tmvagui/Module.mk @@ -20,7 +20,7 @@ TMVAGUIDO := $(TMVAGUIDS:.cxx=.o) TMVAGUIDH := $(TMVAGUIDS:.cxx=.h) TMVAGUIH1 := annconvergencetest.h deviations.h mvaeffs.h PlotFoams.h TMVAGui.h\ - BDTControlPlots.h correlationscatters.h efficiencies.h mvas.h probas.h \ + BDTControlPlots.h correlationscatters.h efficiencies.h efficienciesMulticlass.h mvas.h probas.h \ BDT.h correlationscattersMultiClass.h likelihoodrefs.h mvasMulticlass.h regression_averagedevs.h TMVAMultiClassGui.h\ BDT_Reg.h correlations.h mvaweights.h rulevisCorr.h TMVARegGui.h\ BoostControlPlots.h correlationsMultiClass.h network.h rulevis.h variables.h\ diff --git a/tmva/tmvagui/inc/LinkDef.h b/tmva/tmvagui/inc/LinkDef.h index ac5e11fd8875b..d150602d66630 100644 --- a/tmva/tmvagui/inc/LinkDef.h +++ b/tmva/tmvagui/inc/LinkDef.h @@ -23,7 +23,8 @@ #pragma link C++ function TMVA::CorrGui; #pragma link C++ function TMVA::CorrGuiMultiClass; #pragma link C++ function TMVA::deviations; -#pragma link C++ function TMVA::efficiencies; +#pragma link C++ function TMVA::efficiencies; +#pragma link C++ function TMVA::efficienciesMulticlass; #pragma link C++ function TMVA::likelihoodrefs; #pragma link C++ function TMVA::MovieMaker; #pragma link C++ defined_in "TMVA/mvaeffs.h"; diff --git a/tmva/tmvagui/inc/TMVA/efficienciesMulticlass.h b/tmva/tmvagui/inc/TMVA/efficienciesMulticlass.h new file mode 100644 index 0000000000000..3cfe9e4fd61a1 --- /dev/null +++ b/tmva/tmvagui/inc/TMVA/efficienciesMulticlass.h @@ -0,0 +1,25 @@ +#ifndef efficienciesMulticlass__HH +#define efficienciesMulticlass__HH + +#include "tmvaglob.h" + +class TCanvas; +class TDirectory; +class TFile; +class TGraph; +class TString; + +namespace TMVA { + +enum class EEfficiencyPlotType { kEffBvsEffS, kRejBvsEffS }; + +void efficienciesMulticlass(TString dataset, TString filename_input = "TMVAMulticlass.root", + EEfficiencyPlotType plotType = EEfficiencyPlotType::kRejBvsEffS, + Bool_t useTMVAStyle = kTRUE); + +void plotEfficienciesMulticlass(EEfficiencyPlotType plotType = EEfficiencyPlotType::kRejBvsEffS, + TDirectory *BinDir = 0); + +} // namespace TMVA + +#endif diff --git a/tmva/tmvagui/src/TMVAMultiClassGui.cxx b/tmva/tmvagui/src/TMVAMultiClassGui.cxx index fd22f3ba423f9..2642ec18b2054 100644 --- a/tmva/tmvagui/src/TMVAMultiClassGui.cxx +++ b/tmva/tmvagui/src/TMVAMultiClassGui.cxx @@ -189,63 +189,63 @@ void TMVA::TMVAMultiClassGui(const char* fName ,TString dataset) buttonType, defaultRequiredClassifier ); /* title = Form( "(%ic) Classifier Probability Distributions (test sample)", ic ); - MultiClassActionButton( cbar, + MultiClassActionButton( cbar, Form( "(%ic) Classifier Probability Distributions (test sample)", ic ), Form( "TMVA::mvas(\"%s\",TMVA::kProbaType)", fName ), "Plots the probability of each classifier for the test data (macro mvas(...,1))", buttonType, defaultRequiredClassifier ); - + title =Form( "(%id) Classifier Rarity Distributions (test sample)", ic ); - MultiClassActionButton( cbar, + MultiClassActionButton( cbar, Form( "(%id) Classifier Rarity Distributions (test sample)", ic ), Form( "TMVA::mvas(\"%s\",TMVA::kRarityType)", fName ), - "Plots the Rarity of each classifier for the test data (macro mvas(...,2)) - background distribution should be uniform", - buttonType, defaultRequiredClassifier ); - - + "Plots the Rarity of each classifier for the test data (macro mvas(...,2)) - background distribution should be + uniform", buttonType, defaultRequiredClassifier ); + + title =Form( "(%ia) Classifier Cut Efficiencies", ++ic ); - MultiClassActionButton( cbar, + MultiClassActionButton( cbar, title, Form( "TMVA::mvaeffs(\"%s\")", fName ), "Plots signal and background efficiencies versus cut on classifier output (macro mvaeffs.cxx)", buttonType, defaultRequiredClassifier ); + */ - title = Form( "(%ib) Classifier Background Rejection vs Signal Efficiency (ROC curve)", ic ); - MultiClassActionButton( cbar, - title, - Form( "TMVA::efficiencies(\"%s\")", fName ), - "Plots background rejection vs signal efficiencies (macro efficiencies.cxx) [\"ROC\" stands for \"Receiver Operation Characteristics\"]", - buttonType, defaultRequiredClassifier ); - - - title = Form( "(%i) Parallel Coordinates (requires ROOT-version >= 5.17)", ++ic ); - MultiClassActionButton( cbar, - title, - Form( "TMVA::paracoor(\"%s\")", fName ), - "Plots parallel coordinates for classifiers and input variables (macro paracoor.cxx, requires ROOT >= 5.17)", - buttonType, defaultRequiredClassifier ); + title = Form("(%i) Classifier Background Rejection vs Signal Efficiency (ROC curve)", ++ic); + MultiClassActionButton(cbar, title, Form("TMVA::efficienciesMulticlass(\"%s\", \"%s\")", dataset.Data(), fName), + "Plots background rejection vs signal efficiencies (macro efficiencies.cxx) [\"ROC\" stands " + "for \"Receiver Operation Characteristics\"]", + buttonType, defaultRequiredClassifier); - // parallel coordinates only exist since ROOT 5.17 - #if ROOT_VERSION_CODE < ROOT_VERSION(5,17,0) - TMVAMultiClassGui_inactiveButtons.push_back( title ); - #endif - - - title =Form( "(%i) PDFs of Classifiers (requires \"CreateMVAPdfs\" option set)", ++ic ); - MultiClassActionButton( cbar, - title, - Form( "TMVA::probas(\"%s\")", fName ), - "Plots the PDFs of the classifier output distributions for signal and background - if requested (macro probas.cxx)", - buttonType, defaultRequiredClassifier ); + /* + title = Form( "(%i) Parallel Coordinates (requires ROOT-version >= 5.17)", ++ic ); + MultiClassActionButton( cbar, + title, + Form( "TMVA::paracoor(\"%s\")", fName ), + "Plots parallel coordinates for classifiers and input variables (macro paracoor.cxx, requires ROOT >= 5.17)", + buttonType, defaultRequiredClassifier ); + + // parallel coordinates only exist since ROOT 5.17 + #if ROOT_VERSION_CODE < ROOT_VERSION(5,17,0) + TMVAMultiClassGui_inactiveButtons.push_back( title ); + #endif + + + title =Form( "(%i) PDFs of Classifiers (requires \"CreateMVAPdfs\" option set)", ++ic ); + MultiClassActionButton( cbar, + title, + Form( "TMVA::probas(\"%s\")", fName ), + "Plots the PDFs of the classifier output distributions for signal and background - if requested (macro probas.cxx)", + buttonType, defaultRequiredClassifier ); + + title = Form( "(%i) Likelihood Reference Distributiuons", ++ic); + MultiClassActionButton( cbar, + title, + Form( "TMVA::likelihoodrefs(\"%s\")", fName ), + "Plots to verify the likelihood reference distributions (macro likelihoodrefs.cxx)", + buttonType, "Likelihood" ); + */ - title = Form( "(%i) Likelihood Reference Distributiuons", ++ic); - MultiClassActionButton( cbar, - title, - Form( "TMVA::likelihoodrefs(\"%s\")", fName ), - "Plots to verify the likelihood reference distributions (macro likelihoodrefs.cxx)", - buttonType, "Likelihood" ); - */ - title = Form( "(%ia) Network Architecture (MLP)", ++ic ); TString call = Form( "TMVA::network(\"%s\",\"%s\")",dataset.Data() , fName ); MultiClassActionButton( cbar, diff --git a/tmva/tmvagui/src/efficienciesMulticlass.cxx b/tmva/tmvagui/src/efficienciesMulticlass.cxx new file mode 100644 index 0000000000000..483c0214b31ee --- /dev/null +++ b/tmva/tmvagui/src/efficienciesMulticlass.cxx @@ -0,0 +1,291 @@ +// @(#)Root/tmva $Id$ +// Author: Kim Albertsson +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVAGUI * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation (see header for description) * + * * + * Authors : * + * Kim Albertsson - LTU & CERN * + * * + * Copyright (c) 2005-2017: * + * CERN, Switzerland * + * LTU, Sweden * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#include "TMVA/efficienciesMulticlass.h" + +#include "TFile.h" +#include "TGraph.h" +#include "TH2F.h" +#include "TIterator.h" +#include "TKey.h" + +//////////////////////////////////////////////////////////////////////////////// +/// +/// Note: This file assumes a certain structure on the input file. The structure +/// is as follows: +/// +/// - dataset (TDirectory) +/// - ... some variables, plots ... +/// - Method_XXX (TDirectory) +/// + XXX (TDirectory) +/// * ... some plots ... +/// * MVA_Method_XXX_Test_#classname# +/// * ... some plots ... +/// - Method_YYY (TDirectory) +/// + YYY (TDirectory) +/// * ... some plots ... +/// * MVA_Method_YYY_Test_#classname# +/// * ... some plots ... +/// - TestTree (TTree) +/// + ... data... +/// - TrainTree (TTree) +/// + ... data... +/// +/// Keeping this in mind makes the main loop in plotEfficienciesMulticlass easier +/// to follow :) +/// + +//////////////////////////////////////////////////////////////////////////////// +/// Wrapper for a canvas that also keeps track of color assignments for added +/// subgraphs. + +class EfficiencyPlotWrapper { + +public: + TCanvas *fCanvas; + TLegend *fLegend; + + TString fClassname; + Int_t fColor; + + UInt_t fNumMethods; + + EfficiencyPlotWrapper(TString classname); + Int_t addGraph(TGraph *graph); + + void addLegendEntry(TString methodTitle, TGraph *graph); + +private: + Float_t fx0L; + Float_t fdxL; + Float_t fy0H; + Float_t fdyH; + + TCanvas *newEfficiencyCanvas(TString className); + TLegend *newEfficiencyLegend(); +}; + +//////////////////////////////////////////////////////////////////////////////// +/// Constructs a new canvas + auxiliary data for showing an efficiency plot. +/// + +EfficiencyPlotWrapper::EfficiencyPlotWrapper(TString classname) +{ + // Legend extents (init before calling newEfficiencyLegend...) + fx0L = 0.107; + fy0H = 0.899; + fdxL = 0.457 - fx0L; + fdyH = 0.22; + fx0L = 0.15; + fy0H = 1 - fy0H + fdyH + 0.07; + + fColor = 1; + fNumMethods = 0; + + fClassname = classname; + fCanvas = newEfficiencyCanvas(classname); + fLegend = newEfficiencyLegend(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Adds a new graph to the plot. The added graph should contain a single ROC +/// curve. +/// + +Int_t EfficiencyPlotWrapper::addGraph(TGraph *graph) +{ + graph->SetLineWidth(3); + graph->SetLineColor(fColor); + fColor++; + if (fColor == 5 || fColor == 10 || fColor == 11) { + fColor++; + } + + fCanvas->cd(); + graph->Draw(""); + fCanvas->Update(); + + ++fNumMethods; + + return fColor; +} + +//////////////////////////////////////////////////////////////////////////////// +/// WARNING: Uses the current color, thus the correct call ordering is: +/// plotWrapper->addGraph(...); +/// plotWrapper->addLegendEntry(...); +/// + +void EfficiencyPlotWrapper::addLegendEntry(TString methodTitle, TGraph *graph) +{ + fLegend->AddEntry(graph, methodTitle, "l"); + + Float_t dyH_local = fdyH * (Float_t(TMath::Min((UInt_t)10, fNumMethods) - 3.0) / 4.0); + fLegend->SetY2(fy0H + dyH_local); + + fLegend->Paint(); + fCanvas->Update(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Helper to create new Canvas + +TCanvas *EfficiencyPlotWrapper::newEfficiencyCanvas(TString className) +{ + TString canvas_name = Form("%s_%s", className.Data(), "canvas"); + TString canvas_title = Form("ROC Curve %s", className.Data()); + TCanvas *c = new TCanvas(canvas_name, canvas_title, 200, 0, 650, 500); + // global style settings + c->SetGrid(); + c->SetTicks(); + + // Frame + TString xtit = "Signal Efficiency"; + TString ytit = "Background Rejection (1 - eff)"; + TString ftit = Form("Background Rejection vs Signal Efficiency %s", className.Data()); + Double_t x1 = 0.0; + Double_t x2 = 1.0; + Double_t y1 = 0.0; + Double_t y2 = 1.0; + + TH2F *frame = new TH2F(Form("%s_%s", className.Data(), "frame"), ftit, 500, x1, x2, 500, y1, y2); + frame->GetXaxis()->SetTitle(xtit); + frame->GetYaxis()->SetTitle(ytit); + TMVA::TMVAGlob::SetFrameStyle(frame, 1.0); + frame->DrawClone(); + + return c; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Helper to create new legend. + +TLegend *EfficiencyPlotWrapper::newEfficiencyLegend() +{ + TLegend *legend = new TLegend(fx0L, fy0H - fdyH, fx0L + fdxL, fy0H); + // legend->SetTextSize( 0.05 ); + legend->SetHeader("MVA Method:"); + legend->SetMargin(0.4); + legend->Draw(""); + + return legend; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Entry point. Called from the TMVAMulticlassGui Buttons +/// +/// @param dataset Dataset to operate on. Should be created by the TMVA Multiclass Factory. +/// @param filename_input Name of the input file procuded by a TMVA Multiclass Factory. +/// @param plotType Specified what kind of ROC curve to draw. Currently only rejB vs. effS is supported. + +void TMVA::efficienciesMulticlass(TString dataset, TString filename_input, EEfficiencyPlotType plotType, + Bool_t useTMVAStyle) +{ + // set style and remove existing canvas' + TMVAGlob::Initialize(useTMVAStyle); + + // checks if filename_input is already open, and if not opens one + TFile *file = TMVAGlob::OpenFile(filename_input); + if (file == nullptr) { + std::cout << "ERROR: filename \"" << filename_input << "\" is not found."; + return; + } + + plotEfficienciesMulticlass(plotType, file->GetDirectory(dataset.Data())); + + return; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Work horse function. Will operate on the currently open file (opened by +/// efficienciesMulticlass). +/// +/// @param plotType See effcienciesMulticlass. +/// @param binDir Directory in the file on which to operate. + +void TMVA::plotEfficienciesMulticlass(EEfficiencyPlotType plotType, TDirectory *binDir) +{ + // The current multiclass version implements only type 2 - rejB vs effS + if (plotType != EEfficiencyPlotType::kRejBvsEffS) { + std::cout << "Error: For multiclass, only rejB vs effS is currently implemented."; + } + + TString methodPrefix = "MVA_"; + TString graphNameRef = "rejBvsS"; + std::map classCanvasMap; + + TList methods; + UInt_t nm = TMVAGlob::GetListOfMethods(methods, binDir); + if (nm == 0) { + cout << "ups .. no methods found in to plot ROC curve for ... give up" << endl; + return; + } + // TIter next(file->GetListOfKeys()); + TIter next(&methods); + + // Loop over all method categories + TKey *key; + while ((key = (TKey *)next())) { + TDirectory *mDir = (TDirectory *)key->ReadObj(); + TList titles; + TMVAGlob::GetListOfTitles(mDir, titles); + + // Loop over each method within a category + TIter nextTitle(&titles); + TKey *titkey; + TDirectory *titDir; + while ((titkey = TMVAGlob::NextKey(nextTitle, "TDirectory"))) { + titDir = (TDirectory *)titkey->ReadObj(); + TString methodTitle; + TMVAGlob::GetMethodTitle(methodTitle, titDir); + + // Loop through all plots for the method + TIter nextKey(titDir->GetListOfKeys()); + TKey *hkey2; + while ((hkey2 = TMVAGlob::NextKey(nextKey, "TGraph"))) { + + TGraph *h = (TGraph *)hkey2->ReadObj(); + TString hname = h->GetName(); + if (hname.Contains(graphNameRef) && hname.BeginsWith(methodPrefix) && not hname.Contains("Train")) { + + // Extract classname from plot name + UInt_t index = hname.Last('_'); + TString classname = hname(index + 1, hname.Length() - (index + 1)); + + EfficiencyPlotWrapper *plotWrapper; + // Creating the class map lazily, TMVAGlob::GetClassNames is + // bugged and reports more classes than there are. This method + // does not. + try { + plotWrapper = classCanvasMap.at(classname); + } catch (...) { + plotWrapper = new EfficiencyPlotWrapper(classname); + classCanvasMap.emplace(classname.Data(), plotWrapper); + } + + plotWrapper->addGraph(h); + plotWrapper->addLegendEntry(methodTitle, h); + } + } + } + } +} \ No newline at end of file From 98681886525c20dd2c0c0901b4a1154f76ae72f3 Mon Sep 17 00:00:00 2001 From: ashlaban Date: Fri, 12 May 2017 19:22:55 +0200 Subject: [PATCH 2/2] clang-format --- tmva/tmva/src/Factory.cxx | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tmva/tmva/src/Factory.cxx b/tmva/tmva/src/Factory.cxx index 0507a22f4f80e..a6692d664c40f 100644 --- a/tmva/tmva/src/Factory.cxx +++ b/tmva/tmva/src/Factory.cxx @@ -1729,9 +1729,8 @@ void TMVA::Factory::EvaluateAllMethods( void ) if (sep[k][i] < 0 || sig[k][i] < 0) { // cannot compute separation/significance -> no MVA (usually for Cuts) - Log() << kINFO - << Form("%-13s %-15s: %#1.3f", itrMap->first.Data(), (const char *)mname[k][i], - effArea[k][i]) + Log() << kINFO << Form("%-13s %-15s: %#1.3f", itrMap->first.Data(), (const char *)mname[k][i], + effArea[k][i]) << Endl; // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) @@ -1779,10 +1778,9 @@ void TMVA::Factory::EvaluateAllMethods( void ) MethodBase *theMethod = dynamic_cast((*methods)[i]); if (theMethod == 0) continue; - Log() << kINFO - << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)", - theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i], - trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i]) + Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)", + theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i], + trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i]) << Endl; } }