diff --git a/tmva/tmva/inc/TMVA/ResultsMulticlass.h b/tmva/tmva/inc/TMVA/ResultsMulticlass.h index 09697ca7360b0..f61f15699c240 100644 --- a/tmva/tmva/inc/TMVA/ResultsMulticlass.h +++ b/tmva/tmva/inc/TMVA/ResultsMulticlass.h @@ -76,6 +76,7 @@ namespace TMVA { std::vector& GetAchievableEff(){return fAchievableEff;} std::vector& GetAchievablePur(){return fAchievablePur;} // histogramming + void CreateMulticlassPerformanceHistos(TString prefix); void CreateMulticlassHistos( TString prefix, Int_t nbins, Int_t nbins_high); Double_t EstimatorFunction( std::vector & ); diff --git a/tmva/tmva/src/Factory.cxx b/tmva/tmva/src/Factory.cxx index f15399cf823d9..a6692d664c40f 100644 --- a/tmva/tmva/src/Factory.cxx +++ b/tmva/tmva/src/Factory.cxx @@ -1303,58 +1303,59 @@ void TMVA::Factory::EvaluateAllMethods( void ) theMethod->WriteEvaluationHistosToFile(Types::kTesting); theMethod->WriteEvaluationHistosToFile(Types::kTraining); } - } - else if (theMethod->DoMulticlass()) { - doMulticlass = kTRUE; - Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl; - if(!IsSilentFile()) - { - Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl; - theMethod->WriteEvaluationHistosToFile(Types::kTesting); - theMethod->WriteEvaluationHistosToFile(Types::kTraining); - } - theMethod->TestMulticlass(); - multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur)); - - nmeth_used[0]++; - mname[0].push_back( theMethod->GetMethodName() ); - } - else { - - Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl; - isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0; - - // perform the evaluation - theMethod->TestClassification(); - - - // evaluate the classifier - mname[isel].push_back( theMethod->GetMethodName() ); - sig[isel].push_back ( theMethod->GetSignificance() ); - sep[isel].push_back ( theMethod->GetSeparation() ); - roc[isel].push_back ( theMethod->GetROCIntegral() ); - - Double_t err; - eff01[isel].push_back( theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err) ); - eff01err[isel].push_back( err ); - eff10[isel].push_back( theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err) ); - eff10err[isel].push_back( err ); - eff30[isel].push_back( theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err) ); - eff30err[isel].push_back( err ); - effArea[isel].push_back( theMethod->GetEfficiency("", Types::kTesting, err) ); // computes the area (average) - - trainEff01[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.01") ); // the first pass takes longer - trainEff10[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.10") ); - trainEff30[isel].push_back( theMethod->GetTrainingEfficiency("Efficiency:0.30") ); - - nmeth_used[isel]++; + } else if (theMethod->DoMulticlass()) { + // ==================================================================== + // === Multiclass evaluation + // ==================================================================== + doMulticlass = kTRUE; + Log() << kINFO << "Evaluate multiclass classification method: " << theMethod->GetMethodName() << Endl; + + theMethod->TestMulticlass(); + multiclass_testEff.push_back(theMethod->GetMulticlassEfficiency(multiclass_testPur)); + + // FIXME: This code snippet is repeated in other branches + if (not IsSilentFile()) { + Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl; + theMethod->WriteEvaluationHistosToFile(Types::kTesting); + theMethod->WriteEvaluationHistosToFile(Types::kTraining); + } - if(!IsSilentFile()) - { - Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl; - theMethod->WriteEvaluationHistosToFile(Types::kTesting); - theMethod->WriteEvaluationHistosToFile(Types::kTraining); - } + nmeth_used[0]++; + mname[0].push_back(theMethod->GetMethodName()); + } else { + + Log() << kHEADER << "Evaluate classifier: " << theMethod->GetMethodName() << Endl << Endl; + isel = (theMethod->GetMethodTypeName().Contains("Variable")) ? 1 : 0; + + // perform the evaluation + theMethod->TestClassification(); + + // evaluate the classifier + mname[isel].push_back(theMethod->GetMethodName()); + sig[isel].push_back(theMethod->GetSignificance()); + sep[isel].push_back(theMethod->GetSeparation()); + roc[isel].push_back(theMethod->GetROCIntegral()); + + Double_t err; + eff01[isel].push_back(theMethod->GetEfficiency("Efficiency:0.01", Types::kTesting, err)); + eff01err[isel].push_back(err); + eff10[isel].push_back(theMethod->GetEfficiency("Efficiency:0.10", Types::kTesting, err)); + eff10err[isel].push_back(err); + eff30[isel].push_back(theMethod->GetEfficiency("Efficiency:0.30", Types::kTesting, err)); + eff30err[isel].push_back(err); + effArea[isel].push_back(theMethod->GetEfficiency("", Types::kTesting, err)); // computes the area (average) + + trainEff01[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer + trainEff10[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.10")); + trainEff30[isel].push_back(theMethod->GetTrainingEfficiency("Efficiency:0.30")); + + nmeth_used[isel]++; + + if (!IsSilentFile()) { + Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl; + theMethod->WriteEvaluationHistosToFile(Types::kTesting); + theMethod->WriteEvaluationHistosToFile(Types::kTraining); + } } } if (doRegression) { @@ -1687,104 +1688,107 @@ void TMVA::Factory::EvaluateAllMethods( void ) Log() << kINFO << hLine << Endl; Log() << kINFO << Endl; } - } - else { - if(fROC) - { - Log().EnableOutput(); - gConfig().SetSilent(kFALSE); - Log() << Endl; - TString hLine = "-------------------------------------------------------------------------------------------------------------------"; - Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl; - Log() << kINFO << hLine << Endl; - Log() << kINFO << "DataSet MVA " << Endl; - Log() << kINFO << "Name: Method: ROC-integ" << Endl; - -// Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error): | Sepa- Signifi- " << Endl; -// Log() << kDEBUG << "Name: Method: @B=0.01 @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl; - Log() << kDEBUG << hLine << Endl; - for (Int_t k=0; k<2; k++) { - if (k == 1 && nmeth_used[k] > 0) { + } else { + // Binary classification + if (fROC) { + Log().EnableOutput(); + gConfig().SetSilent(kFALSE); + Log() << Endl; + TString hLine = "------------------------------------------------------------------------------------------" + "-------------------------"; + Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl; Log() << kINFO << hLine << Endl; - Log() << kINFO << "Input Variables: " << Endl << hLine << Endl; - } - for (Int_t i=0; i(GetMethod(itrMap->first,mname[k][i])); - if(theMethod==0) continue; - TMVA::Results *results=theMethod->Data()->GetResults(mname[k][i],Types::kTesting,Types::kClassification); - std::vector *mvaRes = dynamic_cast(results)->GetValueVector(); - std::vector *mvaResType = dynamic_cast(results)->GetValueVectorTypes(); - Double_t fROCalcValue = 0; - TMVA::ROCCurve *fROCCurve = nullptr; - if (mvaResType->size() != 0) { - fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType); - fROCalcValue = fROCCurve->GetROCIntegral(); - } - - if (sep[k][i] < 0 || sig[k][i] < 0) { - // cannot compute separation/significance -> no MVA (usually for Cuts) - Log() << kINFO << Form("%-13s %-15s: %#1.3f", - itrMap->first.Data(), - (const char*)mname[k][i], - effArea[k][i]) << Endl; - -// Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f %#1.3f | -- --", -// itrMap->first.Data(), -// (const char*)mname[k][i], -// eff01[k][i], Int_t(1000*eff01err[k][i]), -// eff10[k][i], Int_t(1000*eff10err[k][i]), -// eff30[k][i], Int_t(1000*eff30err[k][i]), -// effArea[k][i],fROCalcValue) << Endl; - } - else { - Log() << kINFO << Form("%-13s %-15s: %#1.3f", - itrMap->first.Data(), - (const char*)mname[k][i], - fROCalcValue) << Endl; -// Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) %#1.3f %#1.3f | %#1.3f %#1.3f", -// itrMap->first.Data(), -// (const char*)mname[k][i], -// eff01[k][i], Int_t(1000*eff01err[k][i]), -// eff10[k][i], Int_t(1000*eff10err[k][i]), -// eff30[k][i], Int_t(1000*eff30err[k][i]), -// effArea[k][i],fROCalcValue, -// sep[k][i], sig[k][i]) << Endl; - } - if (fROCCurve) delete fROCCurve; - } - } - Log() << kINFO << hLine << Endl; - Log() << kINFO << Endl; - Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl; - Log() << kINFO << hLine << Endl; - Log() << kINFO << "DataSet MVA Signal efficiency: from test sample (from training sample) " << Endl; - Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 " << Endl; - Log() << kINFO << hLine << Endl; - for (Int_t k=0; k<2; k++) { - if (k == 1 && nmeth_used[k] > 0) { + Log() << kINFO << "DataSet MVA " << Endl; + Log() << kINFO << "Name: Method: ROC-integ" << Endl; + + // Log() << kDEBUG << "DataSet MVA Signal efficiency at bkg eff.(error): + // | Sepa- Signifi- " << Endl; Log() << kDEBUG << "Name: Method: @B=0.01 + // @B=0.10 @B=0.30 ROC-integ ROCCurve| ration: cance: " << Endl; + Log() << kDEBUG << hLine << Endl; + for (Int_t k = 0; k < 2; k++) { + if (k == 1 && nmeth_used[k] > 0) { + Log() << kINFO << hLine << Endl; + Log() << kINFO << "Input Variables: " << Endl << hLine << Endl; + } + for (Int_t i = 0; i < nmeth_used[k]; i++) { + if (k == 1) mname[k][i].ReplaceAll("Variable_", ""); + + MethodBase *theMethod = dynamic_cast(GetMethod(itrMap->first, mname[k][i])); + if (theMethod == 0) continue; + TMVA::Results *results = + theMethod->Data()->GetResults(mname[k][i], Types::kTesting, Types::kClassification); + std::vector *mvaRes = dynamic_cast(results)->GetValueVector(); + std::vector *mvaResType = + dynamic_cast(results)->GetValueVectorTypes(); + Double_t fROCalcValue = 0; + TMVA::ROCCurve *fROCCurve = nullptr; + if (mvaResType->size() != 0) { + fROCCurve = new TMVA::ROCCurve(*mvaRes, *mvaResType); + fROCalcValue = fROCCurve->GetROCIntegral(); + } + + if (sep[k][i] < 0 || sig[k][i] < 0) { + // cannot compute separation/significance -> no MVA (usually for Cuts) + Log() << kINFO << Form("%-13s %-15s: %#1.3f", itrMap->first.Data(), (const char *)mname[k][i], + effArea[k][i]) + << Endl; + + // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) + // %#1.3f %#1.3f | -- --", + // itrMap->first.Data(), + // (const char*)mname[k][i], + // eff01[k][i], Int_t(1000*eff01err[k][i]), + // eff10[k][i], Int_t(1000*eff10err[k][i]), + // eff30[k][i], Int_t(1000*eff30err[k][i]), + // effArea[k][i],fROCalcValue) << Endl; + } else { + Log() << kINFO + << Form("%-13s %-15s: %#1.3f", itrMap->first.Data(), (const char *)mname[k][i], fROCalcValue) + << Endl; + // Log() << kDEBUG << Form("%-20s %-15s: %#1.3f(%02i) %#1.3f(%02i) %#1.3f(%02i) + // %#1.3f %#1.3f | %#1.3f %#1.3f", + // itrMap->first.Data(), + // (const char*)mname[k][i], + // eff01[k][i], Int_t(1000*eff01err[k][i]), + // eff10[k][i], Int_t(1000*eff10err[k][i]), + // eff30[k][i], Int_t(1000*eff30err[k][i]), + // effArea[k][i],fROCalcValue, + // sep[k][i], sig[k][i]) << Endl; + } + if (fROCCurve) delete fROCCurve; + } + } Log() << kINFO << hLine << Endl; - Log() << kINFO << "Input Variables: " << Endl << hLine << Endl; - } - for (Int_t i=0; i((*methods)[i]); - if(theMethod==0) continue; - - Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)", - theMethod->fDataSetInfo.GetName(), - (const char*)mname[k][i], - eff01[k][i],trainEff01[k][i], - eff10[k][i],trainEff10[k][i], - eff30[k][i],trainEff30[k][i]) << Endl; - } - } - Log() << kINFO << hLine << Endl; - Log() << kINFO << Endl; + Log() << kINFO << Endl; + Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl; + Log() << kINFO << hLine << Endl; + Log() << kINFO + << "DataSet MVA Signal efficiency: from test sample (from training sample) " + << Endl; + Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 " + << Endl; + Log() << kINFO << hLine << Endl; + for (Int_t k = 0; k < 2; k++) { + if (k == 1 && nmeth_used[k] > 0) { + Log() << kINFO << hLine << Endl; + Log() << kINFO << "Input Variables: " << Endl << hLine << Endl; + } + for (Int_t i = 0; i < nmeth_used[k]; i++) { + if (k == 1) mname[k][i].ReplaceAll("Variable_", ""); + MethodBase *theMethod = dynamic_cast((*methods)[i]); + if (theMethod == 0) continue; + + Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)", + theMethod->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i], + trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i]) + << Endl; + } + } + Log() << kINFO << hLine << Endl; + Log() << kINFO << Endl; - if (gTools().CheckForSilentOption( GetOptions() )) Log().InhibitOutput(); - }//end fROC + if (gTools().CheckForSilentOption(GetOptions())) Log().InhibitOutput(); + } // end fROC } if(!IsSilentFile()) { diff --git a/tmva/tmva/src/MethodBase.cxx b/tmva/tmva/src/MethodBase.cxx index 1355f11583bee..faa5ee317e9bd 100644 --- a/tmva/tmva/src/MethodBase.cxx +++ b/tmva/tmva/src/MethodBase.cxx @@ -821,6 +821,7 @@ void TMVA::MethodBase::AddMulticlassOutput(Types::ETreeType type) TString histNamePrefix(GetTestvarName()); histNamePrefix += (type==Types::kTraining?"_Train":"_Test"); resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH ); + resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix); } //////////////////////////////////////////////////////////////////////////////// diff --git a/tmva/tmva/src/ResultsMulticlass.cxx b/tmva/tmva/src/ResultsMulticlass.cxx index bc4787db1db36..bb838003cce93 100644 --- a/tmva/tmva/src/ResultsMulticlass.cxx +++ b/tmva/tmva/src/ResultsMulticlass.cxx @@ -40,9 +40,11 @@ Class which takes the results of a multiclass classification #include "TMVA/GeneticFitter.h" #include "TMVA/MsgLogger.h" #include "TMVA/Results.h" +#include "TMVA/ROCCurve.h" #include "TMVA/Tools.h" #include "TMVA/Types.h" +#include "TGraph.h" #include "TH1F.h" #include @@ -176,6 +178,65 @@ std::vector TMVA::ResultsMulticlass::GetBestMultiClassCuts(UInt_t targ return result; } +//////////////////////////////////////////////////////////////////////////////// +/// Create performance graphs for this classifier a multiclass setting. +/// Requires that the method has already been evaluated (that a resultset +/// already exists.) +/// +/// Currently uses the new way of calculating ROC Curves. If anything looks +/// fishy, please contact the ROOT TMVA team. +/// + +void TMVA::ResultsMulticlass::CreateMulticlassPerformanceHistos(TString prefix) +{ + DataSet *ds = GetDataSet(); + ds->SetCurrentType(GetTreeType()); + const DataSetInfo *dsi = GetDataSetInfo(); + + UInt_t numClasses = dsi->GetNClasses(); + + std::vector> *rawMvaRes = GetValueVector(); + + for (size_t iClass = 0; iClass < numClasses; ++iClass) { + // Format data + // TODO: Replace with calls to GetMvaValuesPerClass + std::vector mvaRes; + std::vector mvaResTypes; + std::vector mvaResWeights; + + // Vector transpose due to values being stored as + // [ [0, 1, 2], [0, 1, 2], ... ] + // in ResultsMulticlass::GetValueVector. + mvaRes.reserve(rawMvaRes->size()); + for (auto item : *rawMvaRes) { + mvaRes.push_back(item[iClass]); + } + + auto eventCollection = ds->GetEventCollection(); + mvaResTypes.reserve(eventCollection.size()); + mvaResWeights.reserve(eventCollection.size()); + for (auto ev : eventCollection) { + mvaResTypes.push_back(ev->GetClass() == iClass); + mvaResWeights.push_back(ev->GetWeight()); + } + + // Get ROC Curve + ROCCurve *roc = new ROCCurve(mvaRes, mvaResTypes, mvaResWeights); + TGraph *rocGraph = new TGraph(*(roc->GetROCCurve())); + delete roc; + + // Style ROC Curve + TString className = dsi->GetClassInfo(iClass)->GetName(); + TString name = Form("%s_rejBvsS_%s", prefix.Data(), className.Data()); + TString title = Form("%s_%s", prefix.Data(), className.Data()); + rocGraph->SetName(name); + rocGraph->SetTitle(title); + + // Store ROC Curve + Store(rocGraph); + } +} + //////////////////////////////////////////////////////////////////////////////// /// this function fills the mva response histos for multiclass classification diff --git a/tmva/tmvagui/CMakeLists.txt b/tmva/tmvagui/CMakeLists.txt index fd1b3f1a428bb..9540cc0f8d919 100644 --- a/tmva/tmvagui/CMakeLists.txt +++ b/tmva/tmvagui/CMakeLists.txt @@ -12,7 +12,7 @@ if(NOT CMAKE_PROJECT_NAME STREQUAL ROOT) endif() set(headers1 annconvergencetest.h deviations.h mvaeffs.h PlotFoams.h TMVAGui.h - BDTControlPlots.h correlationscatters.h efficiencies.h mvas.h probas.h + BDTControlPlots.h correlationscatters.h efficiencies.h efficienciesMulticlass.h mvas.h probas.h BDT.h correlationscattersMultiClass.h likelihoodrefs.h mvasMulticlass.h regression_averagedevs.h TMVAMultiClassGui.h BDT_Reg.h correlations.h mvaweights.h rulevisCorr.h TMVARegGui.h BoostControlPlots.h correlationsMultiClass.h network.h rulevis.h variables.h diff --git a/tmva/tmvagui/Module.mk b/tmva/tmvagui/Module.mk index 368b6a8c6c0d3..ba20261d70754 100644 --- a/tmva/tmvagui/Module.mk +++ b/tmva/tmvagui/Module.mk @@ -20,7 +20,7 @@ TMVAGUIDO := $(TMVAGUIDS:.cxx=.o) TMVAGUIDH := $(TMVAGUIDS:.cxx=.h) TMVAGUIH1 := annconvergencetest.h deviations.h mvaeffs.h PlotFoams.h TMVAGui.h\ - BDTControlPlots.h correlationscatters.h efficiencies.h mvas.h probas.h \ + BDTControlPlots.h correlationscatters.h efficiencies.h efficienciesMulticlass.h mvas.h probas.h \ BDT.h correlationscattersMultiClass.h likelihoodrefs.h mvasMulticlass.h regression_averagedevs.h TMVAMultiClassGui.h\ BDT_Reg.h correlations.h mvaweights.h rulevisCorr.h TMVARegGui.h\ BoostControlPlots.h correlationsMultiClass.h network.h rulevis.h variables.h\ diff --git a/tmva/tmvagui/inc/LinkDef.h b/tmva/tmvagui/inc/LinkDef.h index ac5e11fd8875b..d150602d66630 100644 --- a/tmva/tmvagui/inc/LinkDef.h +++ b/tmva/tmvagui/inc/LinkDef.h @@ -23,7 +23,8 @@ #pragma link C++ function TMVA::CorrGui; #pragma link C++ function TMVA::CorrGuiMultiClass; #pragma link C++ function TMVA::deviations; -#pragma link C++ function TMVA::efficiencies; +#pragma link C++ function TMVA::efficiencies; +#pragma link C++ function TMVA::efficienciesMulticlass; #pragma link C++ function TMVA::likelihoodrefs; #pragma link C++ function TMVA::MovieMaker; #pragma link C++ defined_in "TMVA/mvaeffs.h"; diff --git a/tmva/tmvagui/inc/TMVA/efficienciesMulticlass.h b/tmva/tmvagui/inc/TMVA/efficienciesMulticlass.h new file mode 100644 index 0000000000000..3cfe9e4fd61a1 --- /dev/null +++ b/tmva/tmvagui/inc/TMVA/efficienciesMulticlass.h @@ -0,0 +1,25 @@ +#ifndef efficienciesMulticlass__HH +#define efficienciesMulticlass__HH + +#include "tmvaglob.h" + +class TCanvas; +class TDirectory; +class TFile; +class TGraph; +class TString; + +namespace TMVA { + +enum class EEfficiencyPlotType { kEffBvsEffS, kRejBvsEffS }; + +void efficienciesMulticlass(TString dataset, TString filename_input = "TMVAMulticlass.root", + EEfficiencyPlotType plotType = EEfficiencyPlotType::kRejBvsEffS, + Bool_t useTMVAStyle = kTRUE); + +void plotEfficienciesMulticlass(EEfficiencyPlotType plotType = EEfficiencyPlotType::kRejBvsEffS, + TDirectory *BinDir = 0); + +} // namespace TMVA + +#endif diff --git a/tmva/tmvagui/src/TMVAMultiClassGui.cxx b/tmva/tmvagui/src/TMVAMultiClassGui.cxx index fd22f3ba423f9..2642ec18b2054 100644 --- a/tmva/tmvagui/src/TMVAMultiClassGui.cxx +++ b/tmva/tmvagui/src/TMVAMultiClassGui.cxx @@ -189,63 +189,63 @@ void TMVA::TMVAMultiClassGui(const char* fName ,TString dataset) buttonType, defaultRequiredClassifier ); /* title = Form( "(%ic) Classifier Probability Distributions (test sample)", ic ); - MultiClassActionButton( cbar, + MultiClassActionButton( cbar, Form( "(%ic) Classifier Probability Distributions (test sample)", ic ), Form( "TMVA::mvas(\"%s\",TMVA::kProbaType)", fName ), "Plots the probability of each classifier for the test data (macro mvas(...,1))", buttonType, defaultRequiredClassifier ); - + title =Form( "(%id) Classifier Rarity Distributions (test sample)", ic ); - MultiClassActionButton( cbar, + MultiClassActionButton( cbar, Form( "(%id) Classifier Rarity Distributions (test sample)", ic ), Form( "TMVA::mvas(\"%s\",TMVA::kRarityType)", fName ), - "Plots the Rarity of each classifier for the test data (macro mvas(...,2)) - background distribution should be uniform", - buttonType, defaultRequiredClassifier ); - - + "Plots the Rarity of each classifier for the test data (macro mvas(...,2)) - background distribution should be + uniform", buttonType, defaultRequiredClassifier ); + + title =Form( "(%ia) Classifier Cut Efficiencies", ++ic ); - MultiClassActionButton( cbar, + MultiClassActionButton( cbar, title, Form( "TMVA::mvaeffs(\"%s\")", fName ), "Plots signal and background efficiencies versus cut on classifier output (macro mvaeffs.cxx)", buttonType, defaultRequiredClassifier ); + */ - title = Form( "(%ib) Classifier Background Rejection vs Signal Efficiency (ROC curve)", ic ); - MultiClassActionButton( cbar, - title, - Form( "TMVA::efficiencies(\"%s\")", fName ), - "Plots background rejection vs signal efficiencies (macro efficiencies.cxx) [\"ROC\" stands for \"Receiver Operation Characteristics\"]", - buttonType, defaultRequiredClassifier ); - - - title = Form( "(%i) Parallel Coordinates (requires ROOT-version >= 5.17)", ++ic ); - MultiClassActionButton( cbar, - title, - Form( "TMVA::paracoor(\"%s\")", fName ), - "Plots parallel coordinates for classifiers and input variables (macro paracoor.cxx, requires ROOT >= 5.17)", - buttonType, defaultRequiredClassifier ); + title = Form("(%i) Classifier Background Rejection vs Signal Efficiency (ROC curve)", ++ic); + MultiClassActionButton(cbar, title, Form("TMVA::efficienciesMulticlass(\"%s\", \"%s\")", dataset.Data(), fName), + "Plots background rejection vs signal efficiencies (macro efficiencies.cxx) [\"ROC\" stands " + "for \"Receiver Operation Characteristics\"]", + buttonType, defaultRequiredClassifier); - // parallel coordinates only exist since ROOT 5.17 - #if ROOT_VERSION_CODE < ROOT_VERSION(5,17,0) - TMVAMultiClassGui_inactiveButtons.push_back( title ); - #endif - - - title =Form( "(%i) PDFs of Classifiers (requires \"CreateMVAPdfs\" option set)", ++ic ); - MultiClassActionButton( cbar, - title, - Form( "TMVA::probas(\"%s\")", fName ), - "Plots the PDFs of the classifier output distributions for signal and background - if requested (macro probas.cxx)", - buttonType, defaultRequiredClassifier ); + /* + title = Form( "(%i) Parallel Coordinates (requires ROOT-version >= 5.17)", ++ic ); + MultiClassActionButton( cbar, + title, + Form( "TMVA::paracoor(\"%s\")", fName ), + "Plots parallel coordinates for classifiers and input variables (macro paracoor.cxx, requires ROOT >= 5.17)", + buttonType, defaultRequiredClassifier ); + + // parallel coordinates only exist since ROOT 5.17 + #if ROOT_VERSION_CODE < ROOT_VERSION(5,17,0) + TMVAMultiClassGui_inactiveButtons.push_back( title ); + #endif + + + title =Form( "(%i) PDFs of Classifiers (requires \"CreateMVAPdfs\" option set)", ++ic ); + MultiClassActionButton( cbar, + title, + Form( "TMVA::probas(\"%s\")", fName ), + "Plots the PDFs of the classifier output distributions for signal and background - if requested (macro probas.cxx)", + buttonType, defaultRequiredClassifier ); + + title = Form( "(%i) Likelihood Reference Distributiuons", ++ic); + MultiClassActionButton( cbar, + title, + Form( "TMVA::likelihoodrefs(\"%s\")", fName ), + "Plots to verify the likelihood reference distributions (macro likelihoodrefs.cxx)", + buttonType, "Likelihood" ); + */ - title = Form( "(%i) Likelihood Reference Distributiuons", ++ic); - MultiClassActionButton( cbar, - title, - Form( "TMVA::likelihoodrefs(\"%s\")", fName ), - "Plots to verify the likelihood reference distributions (macro likelihoodrefs.cxx)", - buttonType, "Likelihood" ); - */ - title = Form( "(%ia) Network Architecture (MLP)", ++ic ); TString call = Form( "TMVA::network(\"%s\",\"%s\")",dataset.Data() , fName ); MultiClassActionButton( cbar, diff --git a/tmva/tmvagui/src/efficienciesMulticlass.cxx b/tmva/tmvagui/src/efficienciesMulticlass.cxx new file mode 100644 index 0000000000000..483c0214b31ee --- /dev/null +++ b/tmva/tmvagui/src/efficienciesMulticlass.cxx @@ -0,0 +1,291 @@ +// @(#)Root/tmva $Id$ +// Author: Kim Albertsson +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVAGUI * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Implementation (see header for description) * + * * + * Authors : * + * Kim Albertsson - LTU & CERN * + * * + * Copyright (c) 2005-2017: * + * CERN, Switzerland * + * LTU, Sweden * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#include "TMVA/efficienciesMulticlass.h" + +#include "TFile.h" +#include "TGraph.h" +#include "TH2F.h" +#include "TIterator.h" +#include "TKey.h" + +//////////////////////////////////////////////////////////////////////////////// +/// +/// Note: This file assumes a certain structure on the input file. The structure +/// is as follows: +/// +/// - dataset (TDirectory) +/// - ... some variables, plots ... +/// - Method_XXX (TDirectory) +/// + XXX (TDirectory) +/// * ... some plots ... +/// * MVA_Method_XXX_Test_#classname# +/// * ... some plots ... +/// - Method_YYY (TDirectory) +/// + YYY (TDirectory) +/// * ... some plots ... +/// * MVA_Method_YYY_Test_#classname# +/// * ... some plots ... +/// - TestTree (TTree) +/// + ... data... +/// - TrainTree (TTree) +/// + ... data... +/// +/// Keeping this in mind makes the main loop in plotEfficienciesMulticlass easier +/// to follow :) +/// + +//////////////////////////////////////////////////////////////////////////////// +/// Wrapper for a canvas that also keeps track of color assignments for added +/// subgraphs. + +class EfficiencyPlotWrapper { + +public: + TCanvas *fCanvas; + TLegend *fLegend; + + TString fClassname; + Int_t fColor; + + UInt_t fNumMethods; + + EfficiencyPlotWrapper(TString classname); + Int_t addGraph(TGraph *graph); + + void addLegendEntry(TString methodTitle, TGraph *graph); + +private: + Float_t fx0L; + Float_t fdxL; + Float_t fy0H; + Float_t fdyH; + + TCanvas *newEfficiencyCanvas(TString className); + TLegend *newEfficiencyLegend(); +}; + +//////////////////////////////////////////////////////////////////////////////// +/// Constructs a new canvas + auxiliary data for showing an efficiency plot. +/// + +EfficiencyPlotWrapper::EfficiencyPlotWrapper(TString classname) +{ + // Legend extents (init before calling newEfficiencyLegend...) + fx0L = 0.107; + fy0H = 0.899; + fdxL = 0.457 - fx0L; + fdyH = 0.22; + fx0L = 0.15; + fy0H = 1 - fy0H + fdyH + 0.07; + + fColor = 1; + fNumMethods = 0; + + fClassname = classname; + fCanvas = newEfficiencyCanvas(classname); + fLegend = newEfficiencyLegend(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Adds a new graph to the plot. The added graph should contain a single ROC +/// curve. +/// + +Int_t EfficiencyPlotWrapper::addGraph(TGraph *graph) +{ + graph->SetLineWidth(3); + graph->SetLineColor(fColor); + fColor++; + if (fColor == 5 || fColor == 10 || fColor == 11) { + fColor++; + } + + fCanvas->cd(); + graph->Draw(""); + fCanvas->Update(); + + ++fNumMethods; + + return fColor; +} + +//////////////////////////////////////////////////////////////////////////////// +/// WARNING: Uses the current color, thus the correct call ordering is: +/// plotWrapper->addGraph(...); +/// plotWrapper->addLegendEntry(...); +/// + +void EfficiencyPlotWrapper::addLegendEntry(TString methodTitle, TGraph *graph) +{ + fLegend->AddEntry(graph, methodTitle, "l"); + + Float_t dyH_local = fdyH * (Float_t(TMath::Min((UInt_t)10, fNumMethods) - 3.0) / 4.0); + fLegend->SetY2(fy0H + dyH_local); + + fLegend->Paint(); + fCanvas->Update(); +} + +//////////////////////////////////////////////////////////////////////////////// +/// Helper to create new Canvas + +TCanvas *EfficiencyPlotWrapper::newEfficiencyCanvas(TString className) +{ + TString canvas_name = Form("%s_%s", className.Data(), "canvas"); + TString canvas_title = Form("ROC Curve %s", className.Data()); + TCanvas *c = new TCanvas(canvas_name, canvas_title, 200, 0, 650, 500); + // global style settings + c->SetGrid(); + c->SetTicks(); + + // Frame + TString xtit = "Signal Efficiency"; + TString ytit = "Background Rejection (1 - eff)"; + TString ftit = Form("Background Rejection vs Signal Efficiency %s", className.Data()); + Double_t x1 = 0.0; + Double_t x2 = 1.0; + Double_t y1 = 0.0; + Double_t y2 = 1.0; + + TH2F *frame = new TH2F(Form("%s_%s", className.Data(), "frame"), ftit, 500, x1, x2, 500, y1, y2); + frame->GetXaxis()->SetTitle(xtit); + frame->GetYaxis()->SetTitle(ytit); + TMVA::TMVAGlob::SetFrameStyle(frame, 1.0); + frame->DrawClone(); + + return c; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Helper to create new legend. + +TLegend *EfficiencyPlotWrapper::newEfficiencyLegend() +{ + TLegend *legend = new TLegend(fx0L, fy0H - fdyH, fx0L + fdxL, fy0H); + // legend->SetTextSize( 0.05 ); + legend->SetHeader("MVA Method:"); + legend->SetMargin(0.4); + legend->Draw(""); + + return legend; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Entry point. Called from the TMVAMulticlassGui Buttons +/// +/// @param dataset Dataset to operate on. Should be created by the TMVA Multiclass Factory. +/// @param filename_input Name of the input file procuded by a TMVA Multiclass Factory. +/// @param plotType Specified what kind of ROC curve to draw. Currently only rejB vs. effS is supported. + +void TMVA::efficienciesMulticlass(TString dataset, TString filename_input, EEfficiencyPlotType plotType, + Bool_t useTMVAStyle) +{ + // set style and remove existing canvas' + TMVAGlob::Initialize(useTMVAStyle); + + // checks if filename_input is already open, and if not opens one + TFile *file = TMVAGlob::OpenFile(filename_input); + if (file == nullptr) { + std::cout << "ERROR: filename \"" << filename_input << "\" is not found."; + return; + } + + plotEfficienciesMulticlass(plotType, file->GetDirectory(dataset.Data())); + + return; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Work horse function. Will operate on the currently open file (opened by +/// efficienciesMulticlass). +/// +/// @param plotType See effcienciesMulticlass. +/// @param binDir Directory in the file on which to operate. + +void TMVA::plotEfficienciesMulticlass(EEfficiencyPlotType plotType, TDirectory *binDir) +{ + // The current multiclass version implements only type 2 - rejB vs effS + if (plotType != EEfficiencyPlotType::kRejBvsEffS) { + std::cout << "Error: For multiclass, only rejB vs effS is currently implemented."; + } + + TString methodPrefix = "MVA_"; + TString graphNameRef = "rejBvsS"; + std::map classCanvasMap; + + TList methods; + UInt_t nm = TMVAGlob::GetListOfMethods(methods, binDir); + if (nm == 0) { + cout << "ups .. no methods found in to plot ROC curve for ... give up" << endl; + return; + } + // TIter next(file->GetListOfKeys()); + TIter next(&methods); + + // Loop over all method categories + TKey *key; + while ((key = (TKey *)next())) { + TDirectory *mDir = (TDirectory *)key->ReadObj(); + TList titles; + TMVAGlob::GetListOfTitles(mDir, titles); + + // Loop over each method within a category + TIter nextTitle(&titles); + TKey *titkey; + TDirectory *titDir; + while ((titkey = TMVAGlob::NextKey(nextTitle, "TDirectory"))) { + titDir = (TDirectory *)titkey->ReadObj(); + TString methodTitle; + TMVAGlob::GetMethodTitle(methodTitle, titDir); + + // Loop through all plots for the method + TIter nextKey(titDir->GetListOfKeys()); + TKey *hkey2; + while ((hkey2 = TMVAGlob::NextKey(nextKey, "TGraph"))) { + + TGraph *h = (TGraph *)hkey2->ReadObj(); + TString hname = h->GetName(); + if (hname.Contains(graphNameRef) && hname.BeginsWith(methodPrefix) && not hname.Contains("Train")) { + + // Extract classname from plot name + UInt_t index = hname.Last('_'); + TString classname = hname(index + 1, hname.Length() - (index + 1)); + + EfficiencyPlotWrapper *plotWrapper; + // Creating the class map lazily, TMVAGlob::GetClassNames is + // bugged and reports more classes than there are. This method + // does not. + try { + plotWrapper = classCanvasMap.at(classname); + } catch (...) { + plotWrapper = new EfficiencyPlotWrapper(classname); + classCanvasMap.emplace(classname.Data(), plotWrapper); + } + + plotWrapper->addGraph(h); + plotWrapper->addLegendEntry(methodTitle, h); + } + } + } + } +} \ No newline at end of file