From 9b99dcbfe97daa9de6def803bd6421a7dc65ddd4 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Wed, 5 Apr 2017 12:31:13 +0200 Subject: [PATCH 1/3] [TDF] Add comments --- tree/treeplayer/inc/ROOT/TDataFrameInterface.hxx | 1 + tree/treeplayer/src/TDFNodes.cxx | 5 +++-- tree/treeplayer/src/TDataFrameInterface.cxx | 8 +++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDataFrameInterface.hxx b/tree/treeplayer/inc/ROOT/TDataFrameInterface.hxx index bbe6a80804c35..b39b326b5ebd7 100644 --- a/tree/treeplayer/inc/ROOT/TDataFrameInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDataFrameInterface.hxx @@ -874,6 +874,7 @@ private: } // W != void: histogram w/ weights + // the case in which X has to be guessed but W was explicitly specified is not supported template TActionResultProxy<::TH1F> Histo1DImpl(W *, const BranchNames_t &bl, const std::shared_ptr<::TH1F> &h) { diff --git a/tree/treeplayer/src/TDFNodes.cxx b/tree/treeplayer/src/TDFNodes.cxx index 384d8eef5c84d..90b51e562e7b9 100644 --- a/tree/treeplayer/src/TDFNodes.cxx +++ b/tree/treeplayer/src/TDFNodes.cxx @@ -164,12 +164,13 @@ void TDataFrameImpl::Run() #endif // R__USE_IMT fHasRunAtLeastOnce = true; - // forget actions and "detach" the action result pointers marking them ready - // and forget them too + // forget actions fBookedActions.clear(); + // make all TActionResultProxies ready for (auto readiness : fResProxyReadiness) { *readiness.get() = true; } + // forget TActionResultProxies fResProxyReadiness.clear(); } diff --git a/tree/treeplayer/src/TDataFrameInterface.cxx b/tree/treeplayer/src/TDataFrameInterface.cxx index f8452f99ec6e9..613a2414461b7 100644 --- a/tree/treeplayer/src/TDataFrameInterface.cxx +++ b/tree/treeplayer/src/TDataFrameInterface.cxx @@ -19,6 +19,8 @@ namespace ROOT { namespace Internal { +// Match expression against names of branches passed as parameter +// Return vector of names of the branches used in the expression std::vector GetUsedBranchesNames(const std::string expression, TObjArray *branches, const std::vector &tmpBranches) { @@ -46,6 +48,8 @@ std::vector GetUsedBranchesNames(const std::string expression, TObj return usedBranches; } +// Jit a string filter or a string temporary column, call this->AddColumn or this->Filter as needed +// Return pointer to the new functional chain node returned by the call, cast to Long_t Long_t InterpretCall(void *thisPtr, const std::string &methodName, const std::string &nodeTypeName, const std::string &name, const std::string &expression, TObjArray *branches, const std::vector &tmpBranches, @@ -147,6 +151,8 @@ Long_t InterpretCall(void *thisPtr, const std::string &methodName, const std::st return retVal; } +// Jit and call "this->Action(params...)" for all actions that support branch type inference +// Return pointer to corresponding TActionResultProxy, cast to Long_t Long_t CreateActionGuessed(const BranchNames_t &bl, const std::string &nodeTypename, void *thisPtr, const std::type_info &art, const std::type_info &at, const void *r, TTree *tree, ROOT::Detail::TDataFrameBranchBase *bbase) @@ -194,7 +200,7 @@ Long_t CreateActionGuessed(const BranchNames_t &bl, const std::string &nodeTypen } return retVal; } -} +} // namespace Internal namespace Experimental { From 05165791bebb0636bacd81fc4488af9b48a0f6e7 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Wed, 5 Apr 2017 12:46:57 +0200 Subject: [PATCH 2/3] [TDF] Fix tables in user guide Markdown tables seem to not like line breaks inside a cell. Clang-format, on the other hand, did not like our very long lines, breaking the docs. --- tree/treeplayer/src/TDataFrame.cxx | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tree/treeplayer/src/TDataFrame.cxx b/tree/treeplayer/src/TDataFrame.cxx index 257d9cc5b7636..d0f575864f0cf 100644 --- a/tree/treeplayer/src/TDataFrame.cxx +++ b/tree/treeplayer/src/TDataFrame.cxx @@ -429,25 +429,16 @@ note that all actions are only executed for events that pass all preceding filte | Mean | Return the mean of processed branch values. | | Min | Return the minimum of processed branch values. | | Profile{1D,2D} | Fill a {one,two}-dimensional profile with the branch values that passed all filters. | -| Reduce | Reduce (e.g. sum, merge) entries using the function (lambda, functor...) passed as argument. The function -must have signature `T(T,T)` where `T` is the type of the branch. Return the final result of the reduction operation. An -optional parameter allows initialization of the result object to non-default values. | +| Reduce | Reduce (e.g. sum, merge) entries using the function (lambda, functor...) passed as argument. The function must have signature `T(T,T)` where `T` is the type of the branch. Return the final result of the reduction operation. An optional parameter allows initialization of the result object to non-default values. | | **Instant actions** | **Description** | |---------------------|-----------------| -| Foreach | Execute a user-defined function on each entry. Users are responsible for the thread-safety of this lambda -when executing with implicit multi-threading enabled. | -| ForeachSlot | Same as `Foreach`, but the user-defined function must take an extra `unsigned int slot` as its first -parameter. `slot` will take a different value, `0` to `nThreads - 1`, for each thread of execution. This is meant as a -helper in writing thread-safe `Foreach` actions when using `TDataFrame` after `ROOT::EnableImplicitMT()`. `ForeachSlot` -works just as well with single-thread execution: in that case `slot` will always be `0`. | +| Foreach | Execute a user-defined function on each entry. Users are responsible for the thread-safety of this lambda when executing with implicit multi-threading enabled. | +| ForeachSlot | Same as `Foreach`, but the user-defined function must take an extra `unsigned int slot` as its first parameter. `slot` will take a different value, `0` to `nThreads - 1`, for each thread of execution. This is meant as a helper in writing thread-safe `Foreach` actions when using `TDataFrame` after `ROOT::EnableImplicitMT()`. `ForeachSlot` works just as well with single-thread execution: in that case `slot` will always be `0`. | | **Queries** | **Description** | |-----------|-----------------| -| Report | This is not properly an action, since when `Report` is called it does not book an operation to be performed -on each entry. Instead, it interrogates the data-frame directly to print a cutflow report, i.e. statistics on how many -entries have been accepted and rejected by the filters. See the section on [named -filters](#named-filters-and-cutflow-reports) for a more detailed explanation. | +| Report | This is not properly an action, since when `Report` is called it does not book an operation to be performed on each entry. Instead, it interrogates the data-frame directly to print a cutflow report, i.e. statistics on how many entries have been accepted and rejected by the filters. See the section on [named filters](#named-filters-and-cutflow-reports) for a more detailed explanation. | ## Parallel execution As pointed out before in this document, `TDataFrame` can transparently perform multi-threaded event loops to speed up From 535ef547538ad7ac30c4e5278327fdd2868a9e00 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Wed, 29 Mar 2017 14:06:18 +0200 Subject: [PATCH 3/3] [TDF] Run clang-format on all TDataFrame tutorials --- tutorials/dataframe/tdf001_introduction.C | 50 ++++++----- tutorials/dataframe/tdf002_dataModel.C | 63 +++++++------- tutorials/dataframe/tdf003_profiles.C | 28 +++--- tutorials/dataframe/tdf004_cutFlowReport.C | 30 ++++--- tutorials/dataframe/tdf005_fillAnyObject.C | 18 ++-- tutorials/dataframe/tdf006_ranges.C | 22 ++--- tutorials/dataframe/tdf101_h1Analysis.C | 99 +++++++++++----------- 7 files changed, 156 insertions(+), 154 deletions(-) diff --git a/tutorials/dataframe/tdf001_introduction.C b/tutorials/dataframe/tdf001_introduction.C index 3fe3c0033a2ee..f60d0298ff750 100644 --- a/tutorials/dataframe/tdf001_introduction.C +++ b/tutorials/dataframe/tdf001_introduction.C @@ -22,16 +22,17 @@ // A simple helper function to fill a test tree: this makes the example // stand-alone. -void fill_tree(const char* filename, const char* treeName) { - TFile f(filename,"RECREATE"); - TTree t(treeName,treeName); +void fill_tree(const char *filename, const char *treeName) +{ + TFile f(filename, "RECREATE"); + TTree t(treeName, treeName); double b1; int b2; t.Branch("b1", &b1); t.Branch("b2", &b2); - for(int i = 0; i < 10; ++i) { + for (int i = 0; i < 10; ++i) { b1 = i; - b2 = i*i; + b2 = i * i; t.Fill(); } t.Write(); @@ -39,18 +40,19 @@ void fill_tree(const char* filename, const char* treeName) { return; } -int tdf001_introduction() { +int tdf001_introduction() +{ // We prepare an input tree to run on auto fileName = "tdf001_introduction.root"; auto treeName = "myTree"; - fill_tree(fileName,treeName); + fill_tree(fileName, treeName); // We read the tree from the file and create a TDataFrame, a class that // allows us to interact with the data contained in the tree. // We select a default column, a *branch* to adopt ROOT jargon, which will // be looked at if none is specified by the user when dealing with filters - //and actions. + // and actions. ROOT::Experimental::TDataFrame d(treeName, fileName, {"b1"}); // ## Operations on the dataframe @@ -68,8 +70,8 @@ int tdf001_introduction() { // filters. Here we show how the automatic selection of the column kicks // in in case the user specifies none. auto entries1 = d.Filter(cutb1) // <- no column name specified here! - .Filter(cutb1b2, {"b2","b1"}) - .Count(); + .Filter(cutb1b2, {"b2", "b1"}) + .Count(); std::cout << *entries1 << " entries passed all filters" << std::endl; @@ -82,13 +84,13 @@ int tdf001_introduction() { // ### `Min`, `Max` and `Mean` actions // These actions allow to retrieve statistical information about the entries // passing the cuts, if any. - auto b1b2_cut = d.Filter(cutb1b2, {"b2","b1"}); + auto b1b2_cut = d.Filter(cutb1b2, {"b2", "b1"}); auto minVal = b1b2_cut.Min(); auto maxVal = b1b2_cut.Max(); auto meanVal = b1b2_cut.Mean(); auto nonDefmeanVal = b1b2_cut.Mean("b2"); // <- Column is not the default - std::cout << "The mean is always included between the min and the max: " - << *minVal << " <= " << *meanVal << " <= " << *maxVal << std::endl; + std::cout << "The mean is always included between the min and the max: " << *minVal << " <= " << *meanVal + << " <= " << *maxVal << std::endl; // ### `Take` action // The `Take` action allows to retrieve all values of the variable stored in a @@ -99,8 +101,7 @@ int tdf001_introduction() { auto b1Vec = b1_cut.Take>(); std::cout << "Selected b1 entries" << std::endl; - for(auto b1_entry : *b1List) - std::cout << b1_entry << " "; + for (auto b1_entry : *b1List) std::cout << b1_entry << " "; std::cout << std::endl; auto b1VecCl = TClass::GetClass(typeid(*b1Vec)); std::cout << "The type of b1Vec is" << b1VecCl->GetName() << std::endl; @@ -118,8 +119,7 @@ int tdf001_introduction() { // In this case we fill a histogram. In some sense this is a violation of a // purely functional paradigm - C++ allows to do that. TH1F h("h", "h", 12, -1, 11); - d.Filter([](int b2) { return b2 % 2 == 0; }, {"b2"}) - .Foreach([&h](double b1) { h.Fill(b1); }); + d.Filter([](int b2) { return b2 % 2 == 0; }, {"b2"}).Foreach([&h](double b1) { h.Fill(b1); }); std::cout << "Filled h with " << h.GetEntries() << " entries" << std::endl; @@ -131,8 +131,8 @@ int tdf001_introduction() { // writing the entire pipeline on one line. This can be easily achieved. // We'll show this re-working the `Count` example: auto cutb1_result = d.Filter(cutb1); - auto cutb1b2_result = d.Filter(cutb1b2, {"b2","b1"}); - auto cutb1_cutb1b2_result = cutb1_result.Filter(cutb1b2, {"b2","b1"}); + auto cutb1b2_result = d.Filter(cutb1b2, {"b2", "b1"}); + auto cutb1_cutb1b2_result = cutb1_result.Filter(cutb1b2, {"b2", "b1"}); // Now we want to count: auto evts_cutb1_result = cutb1_result.Count(); auto evts_cutb1b2_result = cutb1b2_result.Count(); @@ -156,22 +156,20 @@ int tdf001_introduction() { // any value of any type. // Let's dive in an example: auto entries_sum = d.AddColumn("sum", [](double b1, int b2) { return b2 + b1; }, {"b1", "b2"}) - .Filter([](double sum) { return sum > 4.2; }, {"sum"}) - .Count(); + .Filter([](double sum) { return sum > 4.2; }, {"sum"}) + .Count(); std::cout << *entries_sum << std::endl; // Additional columns can be expressed as strings. The content must be C++ // code. The name of the variables must be the name of the branches. The code // is just in time compiled. - auto entries_sum2 = d.AddColumn("sum", "b1 + b2") - .Filter("sum > 4.2") - .Count(); + auto entries_sum2 = d.AddColumn("sum", "b1 + b2").Filter("sum > 4.2").Count(); std::cout << *entries_sum2 << std::endl; return 0; } -int main(){ +int main() +{ return tdf001_introduction(); } - diff --git a/tutorials/dataframe/tdf002_dataModel.C b/tutorials/dataframe/tdf002_dataModel.C index 8cd6d90f8fa8b..b6d14a640b91f 100644 --- a/tutorials/dataframe/tdf002_dataModel.C +++ b/tutorials/dataframe/tdf002_dataModel.C @@ -30,13 +30,14 @@ using CylFourVector = ROOT::Math::RhoEtaPhiVector; // A simple helper function to fill a test tree: this makes the example // stand-alone. -void fill_tree(const char* filename, const char* treeName) { - TFile f(filename,"RECREATE"); - TTree t(treeName,treeName); +void fill_tree(const char *filename, const char *treeName) +{ + TFile f(filename, "RECREATE"); + TTree t(treeName, treeName); FourVectors tracks; t.Branch("tracks", &tracks); - const double M = 0.13957; // set pi+ mass + const double M = 0.13957; // set pi+ mass TRandom3 R(1); for (int i = 0; i < 50; ++i) { @@ -44,15 +45,15 @@ void fill_tree(const char* filename, const char* treeName) { tracks.clear(); tracks.reserve(nPart); for (int j = 0; j < nPart; ++j) { - double px = R.Gaus(0,10); - double py = R.Gaus(0,10); - double pt = sqrt(px*px +py*py); - double eta = R.Uniform(-3,3); - double phi = R.Uniform(0.0 , 2*TMath::Pi() ); - CylFourVector vcyl( pt, eta, phi); + double px = R.Gaus(0, 10); + double py = R.Gaus(0, 10); + double pt = sqrt(px * px + py * py); + double eta = R.Uniform(-3, 3); + double phi = R.Uniform(0.0, 2 * TMath::Pi()); + CylFourVector vcyl(pt, eta, phi); // set energy - double E = sqrt( vcyl.R()*vcyl.R() + M*M); - FourVector q( vcyl.X(), vcyl.Y(), vcyl.Z(), E); + double E = sqrt(vcyl.R() * vcyl.R() + M * M); + FourVector q(vcyl.X(), vcyl.Y(), vcyl.Z(), E); // fill track vector tracks.emplace_back(q); } @@ -64,12 +65,13 @@ void fill_tree(const char* filename, const char* treeName) { return; } -int tdf002_dataModel() { +int tdf002_dataModel() +{ // We prepare an input tree to run on auto fileName = "tdf002_dataModel.root"; auto treeName = "myTree"; - fill_tree(fileName,treeName); + fill_tree(fileName, treeName); // We read the tree from the file and create a TDataFrame, a class that // allows us to interact with the data contained in the tree. @@ -78,9 +80,8 @@ int tdf002_dataModel() { // ## Operating on branches which are collection of objects // Here we deal with the simplest of the cuts: we decide to accept the event // only if the number of tracks is greater than 5. - auto n_cut = [](const FourVectors & tracks) { return tracks.size() > 8; }; - auto nentries = d.Filter(n_cut, {"tracks"}) - .Count(); + auto n_cut = [](const FourVectors &tracks) { return tracks.size() > 8; }; + auto nentries = d.Filter(n_cut, {"tracks"}).Count(); std::cout << *nentries << " passed all filters" << std::endl; @@ -88,29 +89,27 @@ int tdf002_dataModel() { // quantity we are interested in. // In this example, we will cut on the number of tracks and plot their // transverse momentum. - auto getPt = [](const FourVectors& tracks) { + auto getPt = [](const FourVectors &tracks) { std::vector pts; pts.reserve(tracks.size()); - for (auto& t:tracks) - pts.emplace_back(t.Pt()); + for (auto &t : tracks) pts.emplace_back(t.Pt()); return pts; - }; + }; // We do the same for the weights. - auto getPtWeights = [](const FourVectors& tracks) { + auto getPtWeights = [](const FourVectors &tracks) { std::vector ptsw; ptsw.reserve(tracks.size()); - for (auto& t:tracks) - ptsw.emplace_back(1./t.Pt()); + for (auto &t : tracks) ptsw.emplace_back(1. / t.Pt()); return ptsw; - }; + }; - auto augmented_d = d.AddColumn("tracks_n", [](const FourVectors& tracks){return (int)tracks.size();}) - .Filter([](int tracks_n){return tracks_n > 2;}, {"tracks_n"}) - .AddColumn("tracks_pts", getPt) - .AddColumn("tracks_pts_weights", getPtWeights); + auto augmented_d = d.AddColumn("tracks_n", [](const FourVectors &tracks) { return (int)tracks.size(); }) + .Filter([](int tracks_n) { return tracks_n > 2; }, {"tracks_n"}) + .AddColumn("tracks_pts", getPt) + .AddColumn("tracks_pts_weights", getPtWeights); - auto trN = augmented_d.Histo1D("tracks_n",40,-.5,39.5); + auto trN = augmented_d.Histo1D("tracks_n", 40, -.5, 39.5); auto trPts = augmented_d.Histo1D("tracks_pts"); auto trWPts = augmented_d.Histo1D, std::vector>("tracks_pts", "tracks_pts_weights"); @@ -129,7 +128,7 @@ int tdf002_dataModel() { return 0; } -int main(){ +int main() +{ return tdf002_dataModel(); } - diff --git a/tutorials/dataframe/tdf003_profiles.C b/tutorials/dataframe/tdf003_profiles.C index c366e48d6850d..8ab7c54f9970a 100644 --- a/tutorials/dataframe/tdf003_profiles.C +++ b/tutorials/dataframe/tdf003_profiles.C @@ -19,19 +19,19 @@ // A simple helper function to fill a test tree: this makes the example // stand-alone. -void fill_tree(const char* filename, const char* treeName) +void fill_tree(const char *filename, const char *treeName) { - TFile f(filename,"RECREATE"); - TTree t(treeName,treeName); + TFile f(filename, "RECREATE"); + TTree t(treeName, treeName); float px, py, pz; t.Branch("px", &px); t.Branch("py", &py); t.Branch("pz", &pz); - for (int i=0; i<25000; i++) { - gRandom->Rannor(px,py); - pz = px*px + py*py; + for (int i = 0; i < 25000; i++) { + gRandom->Rannor(px, py); + pz = px * px + py * py; t.Fill(); - } + } t.Write(); f.Close(); return; @@ -42,20 +42,20 @@ void tdf003_profiles() // We prepare an input tree to run on auto fileName = "tdf003_profiles.root"; auto treeName = "myTree"; - fill_tree(fileName,treeName); + fill_tree(fileName, treeName); // We read the tree from the file and create a TDataFrame. - ROOT::Experimental::TDataFrame d(treeName, fileName, {"px","py","pz"}); + ROOT::Experimental::TDataFrame d(treeName, fileName, {"px", "py", "pz"}); // Create the profiles - auto hprof1d = d.Profile1D(TProfile("hprof1d","Profile of pz versus px",64,-4,4)); - auto hprof2d = d.Profile2D(TProfile2D("hprof2d","Profile of pz versus px and py",40,-4,4,40,-4,4,0,20)); + auto hprof1d = d.Profile1D(TProfile("hprof1d", "Profile of pz versus px", 64, -4, 4)); + auto hprof2d = d.Profile2D( + TProfile2D("hprof2d", "Profile of pz versus px and py", 40, -4, 4, 40, -4, 4, 0, 20)); // And Draw - auto c1 = new TCanvas("c1","Profile histogram example",200,10,700,500); + auto c1 = new TCanvas("c1", "Profile histogram example", 200, 10, 700, 500); hprof1d->DrawClone(); - auto c2 = new TCanvas("c2","Profile2D histogram example",200,10,700,500); + auto c2 = new TCanvas("c2", "Profile2D histogram example", 200, 10, 700, 500); c2->cd(); hprof2d->DrawClone(); - } diff --git a/tutorials/dataframe/tdf004_cutFlowReport.C b/tutorials/dataframe/tdf004_cutFlowReport.C index 8232d82d5835c..6ec9f73700062 100644 --- a/tutorials/dataframe/tdf004_cutFlowReport.C +++ b/tutorials/dataframe/tdf004_cutFlowReport.C @@ -25,16 +25,17 @@ using CylFourVector = ROOT::Math::RhoEtaPhiVector; // A simple helper function to fill a test tree: this makes the example // stand-alone. -void fill_tree(const char* filename, const char* treeName) { - TFile f(filename,"RECREATE"); - TTree t(treeName,treeName); +void fill_tree(const char *filename, const char *treeName) +{ + TFile f(filename, "RECREATE"); + TTree t(treeName, treeName); double b1; int b2; t.Branch("b1", &b1); t.Branch("b2", &b2); - for(int i = 0; i < 50; ++i) { + for (int i = 0; i < 50; ++i) { b1 = i; - b2 = i*i; + b2 = i * i; t.Fill(); } t.Write(); @@ -42,12 +43,13 @@ void fill_tree(const char* filename, const char* treeName) { return; } -void tdf004_cutFlowReport() { +void tdf004_cutFlowReport() +{ // We prepare an input tree to run on auto fileName = "tdf004_cutFlowReport.root"; auto treeName = "myTree"; - fill_tree(fileName,treeName); + fill_tree(fileName, treeName); // We read the tree from the file and create a TDataFrame ROOT::Experimental::TDataFrame d(treeName, fileName, {"b1", "b2"}); @@ -62,22 +64,26 @@ void tdf004_cutFlowReport() { auto filtered1 = d.Filter(cut1, {"b1"}, "Cut1"); auto filtered2 = d.Filter(cut2, {"b2"}, "Cut2"); - auto augmented1 = filtered2.AddColumn("b3", [](double b1, int b2){ return b1/b2;}); + auto augmented1 = filtered2.AddColumn("b3", [](double b1, int b2) { return b1 / b2; }); auto cut3 = [](double x) { return x < .5; }; auto filtered3 = augmented1.Filter(cut3, {"b3"}, "Cut3"); // Statistics are retrieved through a call to the Report method: - // when Report is called on the main TDataFrame object, it prints stats for all named filters declared up to that point - // when called on a stored chain state (i.e. a chain/graph node), it prints stats for all named filters in the section + // when Report is called on the main TDataFrame object, it prints stats for all named filters declared up to that + // point + // when called on a stored chain state (i.e. a chain/graph node), it prints stats for all named filters in the + // section // of the chain between the main TDataFrame and that node (included). - // Stats are printed in the same order as named filters have been added to the graph, and refer to the latest event-loop that has been run using the relevant TDataFrame. + // Stats are printed in the same order as named filters have been added to the graph, and refer to the latest + // event-loop that has been run using the relevant TDataFrame. std::cout << "Cut3 stats:" << std::endl; filtered3.Report(); std::cout << "All stats:" << std::endl; d.Report(); } -int main(){ +int main() +{ tdf004_cutFlowReport(); return 0; } diff --git a/tutorials/dataframe/tdf005_fillAnyObject.C b/tutorials/dataframe/tdf005_fillAnyObject.C index b54cfb753b197..cdd09d35a7582 100644 --- a/tutorials/dataframe/tdf005_fillAnyObject.C +++ b/tutorials/dataframe/tdf005_fillAnyObject.C @@ -16,16 +16,17 @@ // A simple helper function to fill a test tree: this makes the example // stand-alone. -void fill_tree(const char* filename, const char* treeName) { - TFile f(filename,"RECREATE"); - TTree t(treeName,treeName); +void fill_tree(const char *filename, const char *treeName) +{ + TFile f(filename, "RECREATE"); + TTree t(treeName, treeName); double b1; float b2; t.Branch("b1", &b1); t.Branch("b2", &b2); - for(int i = 0; i < 100; ++i) { + for (int i = 0; i < 100; ++i) { b1 = i; - b2 = i*i; + b2 = i * i; t.Fill(); } t.Write(); @@ -33,12 +34,13 @@ void fill_tree(const char* filename, const char* treeName) { return; } -int tdf005_fillAnyObject() { +int tdf005_fillAnyObject() +{ // We prepare an input tree to run on auto fileName = "tdf005_fillAnyObject.root"; auto treeName = "myTree"; - fill_tree(fileName,treeName); + fill_tree(fileName, treeName); // We read the tree from the file and create a TDataFrame. ROOT::Experimental::TDataFrame d(treeName, fileName); @@ -48,7 +50,7 @@ int tdf005_fillAnyObject() { // `Fill` method with some input arguments. auto th1d = d.Fill(TH1D("th1d", "th1d", 64, 0, 128), {"b1"}); auto th1i = d.Fill(TH1I("th1i", "th1i", 64, 0, 128), {"b2"}); - auto th2d = d.Fill(TH2D("th2d", "th2d", 64, 0, 128, 64, 0, 1024), {"b1","b2"}); + auto th2d = d.Fill(TH2D("th2d", "th2d", 64, 0, 128, 64, 0, 1024), {"b1", "b2"}); auto c1 = new TCanvas(); th1d->DrawClone(); diff --git a/tutorials/dataframe/tdf006_ranges.C b/tutorials/dataframe/tdf006_ranges.C index 1a4c44b9c353b..b7be011590c9b 100644 --- a/tutorials/dataframe/tdf006_ranges.C +++ b/tutorials/dataframe/tdf006_ranges.C @@ -15,16 +15,17 @@ // A simple helper function to fill a test tree: this makes the example // stand-alone. -void fill_tree(const char* filename, const char* treeName) { - TFile f(filename,"RECREATE"); - TTree t(treeName,treeName); +void fill_tree(const char *filename, const char *treeName) +{ + TFile f(filename, "RECREATE"); + TTree t(treeName, treeName); int b1; float b2; t.Branch("b1", &b1); t.Branch("b2", &b2); - for(int i = 0; i < 100; ++i) { + for (int i = 0; i < 100; ++i) { b1 = i; - b2 = i*i; + b2 = i * i; t.Fill(); } t.Write(); @@ -32,12 +33,13 @@ void fill_tree(const char* filename, const char* treeName) { return; } -int tdf006_ranges() { +int tdf006_ranges() +{ // We prepare an input tree to run on auto fileName = "tdf006_ranges.root"; auto treeName = "myTree"; - fill_tree(fileName,treeName); + fill_tree(fileName, treeName); // We read the tree from the file and create a TDataFrame. ROOT::Experimental::TDataFrame d(treeName, fileName); @@ -66,7 +68,7 @@ int tdf006_ranges() { // An important thing to notice is that the counts of a filter are relative to the // number of entries a filter "sees". Therefore, if a Range depends on a filter, // the Range will act on the entries passing the filter only. - auto c_0_3_after_even_b1 = d.Filter("0 == b1 % 2").Range(0,3).Count(); + auto c_0_3_after_even_b1 = d.Filter("0 == b1 % 2").Range(0, 3).Count(); // Ok, time to wrap up: let's print all counts! cout << "Usage of ranges:\n" @@ -75,9 +77,7 @@ int tdf006_ranges() { << " - Entries from 15 onwards: " << *c_15_end << endl << " - Entries from 15 onwards in steps of 3: " << *c_15_end_3 << endl << " - Entries from 0 to 50, odd only: " << *c_0_50_odd_b1 << endl - << " - First three entries of all even entries: " << *c_0_3_after_even_b1 < 2.5; }, {"ptds_d"}) - .Filter([](float etads_d) { return TMath::Abs(etads_d) < 1.5; }, {"etads_d"}) - .Filter([](int ik, int ipi, std::array_view nhitrp) { return nhitrp[ik-1] * nhitrp[ipi-1] > 1; }, - {"ik", "ipi", "nhitrp"}) - .Filter([](int ik, std::array_view rstart, std::array_view rend) { - return rend[ik-1] - rstart[ik-1] > 22; }, - { "ik", "rstart", "rend"}) - .Filter([](int ipi, std::array_view rstart, std::array_view rend) { - return rend[ipi-1] - rstart[ipi-1] > 22; }, - {"ipi", "rstart", "rend"}) - .Filter([](int ik, std::array_view nlhk) { return nlhk[ik-1] > 0.1; }, {"ik", "nlhk"}) - .Filter([](int ipi, std::array_view nlhpi) { return nlhpi[ipi-1] > 0.1; }, {"ipi", "nlhpi"}) - .Filter([](int ipis, std::array_view nlhpi) { return nlhpi[ipis - 1] > 0.1; }, {"ipis", "nlhpi"}) - .Filter([](int njets) { return njets >= 1; }, {"njets"}); +auto Select = [](ROOT::Experimental::TDataFrame &dataFrame) { + auto ret = + dataFrame.Filter([](float md0_d) { return TMath::Abs(md0_d - 1.8646) < 0.04; }, {"md0_d"}) + .Filter([](float ptds_d) { return ptds_d > 2.5; }, {"ptds_d"}) + .Filter([](float etads_d) { return TMath::Abs(etads_d) < 1.5; }, {"etads_d"}) + .Filter([](int ik, int ipi, std::array_view nhitrp) { return nhitrp[ik - 1] * nhitrp[ipi - 1] > 1; }, + {"ik", "ipi", "nhitrp"}) + .Filter([](int ik, std::array_view rstart, + std::array_view rend) { return rend[ik - 1] - rstart[ik - 1] > 22; }, + {"ik", "rstart", "rend"}) + .Filter([](int ipi, std::array_view rstart, + std::array_view rend) { return rend[ipi - 1] - rstart[ipi - 1] > 22; }, + {"ipi", "rstart", "rend"}) + .Filter([](int ik, std::array_view nlhk) { return nlhk[ik - 1] > 0.1; }, {"ik", "nlhk"}) + .Filter([](int ipi, std::array_view nlhpi) { return nlhpi[ipi - 1] > 0.1; }, {"ipi", "nlhpi"}) + .Filter([](int ipis, std::array_view nlhpi) { return nlhpi[ipis - 1] > 0.1; }, {"ipis", "nlhpi"}) + .Filter([](int njets) { return njets >= 1; }, {"njets"}); return ret; }; -const Double_t dxbin = (0.17-0.13)/40; // Bin-width +const Double_t dxbin = (0.17 - 0.13) / 40; // Bin-width Double_t fdm5(Double_t *xx, Double_t *par) { Double_t x = xx[0]; if (x <= 0.13957) return 0; - Double_t xp3 = (x-par[3])*(x-par[3]); - Double_t res = dxbin*(par[0]*pow(x-0.13957, par[1]) - + par[2] / 2.5066/par[4]*exp(-xp3/2/par[4]/par[4])); + Double_t xp3 = (x - par[3]) * (x - par[3]); + Double_t res = + dxbin * (par[0] * pow(x - 0.13957, par[1]) + par[2] / 2.5066 / par[4] * exp(-xp3 / 2 / par[4] / par[4])); return res; } @@ -56,40 +55,39 @@ Double_t fdm2(Double_t *xx, Double_t *par) static const Double_t sigma = 0.0012; Double_t x = xx[0]; if (x <= 0.13957) return 0; - Double_t xp3 = (x-0.1454)*(x-0.1454); - Double_t res = dxbin*(par[0]*pow(x-0.13957, 0.25) - + par[1] / 2.5066/sigma*exp(-xp3/2/sigma/sigma)); + Double_t xp3 = (x - 0.1454) * (x - 0.1454); + Double_t res = dxbin * (par[0] * pow(x - 0.13957, 0.25) + par[1] / 2.5066 / sigma * exp(-xp3 / 2 / sigma / sigma)); return res; } -void FitAndPlotHdmd(TH1& hdmd) { - //create the canvas for the h1analysis fit +void FitAndPlotHdmd(TH1 &hdmd) +{ + // create the canvas for the h1analysis fit gStyle->SetOptFit(); - TCanvas *c1 = new TCanvas("c1","h1analysis analysis",10,10,800,600); + TCanvas *c1 = new TCanvas("c1", "h1analysis analysis", 10, 10, 800, 600); c1->SetBottomMargin(0.15); hdmd.GetXaxis()->SetTitle("m_{K#pi#pi} - m_{K#pi}[GeV/c^{2}]"); hdmd.GetXaxis()->SetTitleOffset(1.4); - //fit histogram hdmd with function f5 using the loglikelihood option - if (gROOT->GetListOfFunctions()->FindObject("f5")) - delete gROOT->GetFunction("f5"); - TF1 *f5 = new TF1("f5",fdm5,0.139,0.17,5); + // fit histogram hdmd with function f5 using the loglikelihood option + if (gROOT->GetListOfFunctions()->FindObject("f5")) delete gROOT->GetFunction("f5"); + TF1 *f5 = new TF1("f5", fdm5, 0.139, 0.17, 5); f5->SetParameters(1000000, .25, 2000, .1454, .001); - hdmd.Fit("f5","lr"); + hdmd.Fit("f5", "lr"); // Have the number of entries on the first histogram (to cross check when running // with entry lists) TPaveStats *psdmd = (TPaveStats *)hdmd.GetListOfFunctions()->FindObject("stats"); - if (psdmd) - psdmd->SetOptStat(1110); + if (psdmd) psdmd->SetOptStat(1110); c1->Modified(); } -void FitAndPlotH2(TH2& h2) { - //create the canvas for tau d0 +void FitAndPlotH2(TH2 &h2) +{ + // create the canvas for tau d0 gStyle->SetOptFit(0); gStyle->SetOptStat(1100); - TCanvas *c2 = new TCanvas("c2","tauD0",100,100,800,600); + TCanvas *c2 = new TCanvas("c2", "tauD0", 100, 100, 800, 600); c2->SetGrid(); c2->SetBottomMargin(0.15); @@ -97,22 +95,22 @@ void FitAndPlotH2(TH2& h2) { // with function f2 and make a histogram for each fit parameter // Note that the generated histograms are added to the list of objects // in the current directory. - if (gROOT->GetListOfFunctions()->FindObject("f2")) - delete gROOT->GetFunction("f2"); - TF1 *f2 = new TF1("f2",fdm2,0.139,0.17,2); + if (gROOT->GetListOfFunctions()->FindObject("f2")) delete gROOT->GetFunction("f2"); + TF1 *f2 = new TF1("f2", fdm2, 0.139, 0.17, 2); f2->SetParameters(10000, 10); - h2.FitSlicesX(f2,0,-1,1,"qln"); + h2.FitSlicesX(f2, 0, -1, 1, "qln"); - TH1D *h2_1 = (TH1D*)gDirectory->Get("h2_1"); + TH1D *h2_1 = (TH1D *)gDirectory->Get("h2_1"); h2_1->GetXaxis()->SetTitle("#tau[ps]"); h2_1->SetMarkerStyle(21); h2_1->Draw(); c2->Update(); - TLine *line = new TLine(0,0,0,c2->GetUymax()); + TLine *line = new TLine(0, 0, 0, c2->GetUymax()); line->Draw(); } -void tdf101_h1Analysis() { +void tdf101_h1Analysis() +{ TChain chain("h42"); chain.Add("http://root.cern.ch/files/h1/dstarmb.root"); chain.Add("http://root.cern.ch/files/h1/dstarp1a.root"); @@ -122,13 +120,12 @@ void tdf101_h1Analysis() { ROOT::Experimental::TDataFrame dataFrame(chain); auto selected = Select(dataFrame); - auto hdmdARP = selected.Histo1D(TH1F("hdmd", "Dm_d",40,0.13,0.17), "dm_d"); - auto selectedAddedBranch = selected.AddColumn("h2_y", - [](float rpd0_t, float ptd0_d){return rpd0_t/0.029979f*1.8646f/ptd0_d;}, - {"rpd0_t", "ptd0_d"}); - auto h2ARP = selectedAddedBranch.Histo2D(TH2F("h2","ptD0 vs Dm_d",30,0.135,0.165,30,-3,6), "dm_d", "h2_y"); + auto hdmdARP = selected.Histo1D(TH1F("hdmd", "Dm_d", 40, 0.13, 0.17), "dm_d"); + auto selectedAddedBranch = selected.AddColumn( + "h2_y", [](float rpd0_t, float ptd0_d) { return rpd0_t / 0.029979f * 1.8646f / ptd0_d; }, {"rpd0_t", "ptd0_d"}); + auto h2ARP = selectedAddedBranch.Histo2D(TH2F("h2", "ptD0 vs Dm_d", 30, 0.135, 0.165, 30, -3, 6), + "dm_d", "h2_y"); FitAndPlotHdmd(*hdmdARP); FitAndPlotH2(*h2ARP); } -