root-project · dpiparo · Apr 5, 2017 · Apr 5, 2017 · Apr 5, 2017 · Mar 29, 2017
@@ -874,6 +874,7 @@ private:
    }
 
    // W != void: histogram w/ weights
+   // the case in which X has to be guessed but W was explicitly specified is not supported
    template <typename X, typename W>
    TActionResultProxy<::TH1F> Histo1DImpl(W *, const BranchNames_t &bl, const std::shared_ptr<::TH1F> &h)
    {

@@ -164,12 +164,13 @@ void TDataFrameImpl::Run()
 #endif // R__USE_IMT
 
    fHasRunAtLeastOnce = true;
-   // forget actions and "detach" the action result pointers marking them ready
-   // and forget them too
+   // forget actions
    fBookedActions.clear();
+   // make all TActionResultProxies ready
    for (auto readiness : fResProxyReadiness) {
       *readiness.get() = true;
    }
+   // forget TActionResultProxies
    fResProxyReadiness.clear();
 }
 

@@ -429,25 +429,16 @@ note that all actions are only executed for events that pass all preceding filte
 | Mean | Return the mean of processed branch values. |
 | Min | Return the minimum of processed branch values. |
 | Profile{1D,2D} | Fill a {one,two}-dimensional profile with the branch values that passed all filters. |
-| Reduce | Reduce (e.g. sum, merge) entries using the function (lambda, functor...) passed as argument. The function
-must have signature `T(T,T)` where `T` is the type of the branch. Return the final result of the reduction operation. An
-optional parameter allows initialization of the result object to non-default values. |
+| Reduce | Reduce (e.g. sum, merge) entries using the function (lambda, functor...) passed as argument. The function must have signature `T(T,T)` where `T` is the type of the branch. Return the final result of the reduction operation. An optional parameter allows initialization of the result object to non-default values. |
 
 | **Instant actions** | **Description** |
 |---------------------|-----------------|
-| Foreach | Execute a user-defined function on each entry. Users are responsible for the thread-safety of this lambda
-when executing with implicit multi-threading enabled. |
-| ForeachSlot | Same as `Foreach`, but the user-defined function must take an extra `unsigned int slot` as its first
-parameter. `slot` will take a different value, `0` to `nThreads - 1`, for each thread of execution. This is meant as a
-helper in writing thread-safe `Foreach` actions when using `TDataFrame` after `ROOT::EnableImplicitMT()`. `ForeachSlot`
-works just as well with single-thread execution: in that case `slot` will always be `0`. |
+| Foreach | Execute a user-defined function on each entry. Users are responsible for the thread-safety of this lambda when executing with implicit multi-threading enabled. |
+| ForeachSlot | Same as `Foreach`, but the user-defined function must take an extra `unsigned int slot` as its first parameter. `slot` will take a different value, `0` to `nThreads - 1`, for each thread of execution. This is meant as a helper in writing thread-safe `Foreach` actions when using `TDataFrame` after `ROOT::EnableImplicitMT()`. `ForeachSlot` works just as well with single-thread execution: in that case `slot` will always be `0`. |
 
 | **Queries** | **Description** |
 |-----------|-----------------|
-| Report | This is not properly an action, since when `Report` is called it does not book an operation to be performed
-on each entry. Instead, it interrogates the data-frame directly to print a cutflow report, i.e. statistics on how many
-entries have been accepted and rejected by the filters. See the section on [named
-filters](#named-filters-and-cutflow-reports) for a more detailed explanation. |
+| Report | This is not properly an action, since when `Report` is called it does not book an operation to be performed on each entry. Instead, it interrogates the data-frame directly to print a cutflow report, i.e. statistics on how many entries have been accepted and rejected by the filters. See the section on [named filters](#named-filters-and-cutflow-reports) for a more detailed explanation. |
 
 ##  <a name="parallel-execution"></a>Parallel execution
 As pointed out before in this document, `TDataFrame` can transparently perform multi-threaded event loops to speed up

@@ -19,6 +19,8 @@
 namespace ROOT {
 
 namespace Internal {
+// Match expression against names of branches passed as parameter
+// Return vector of names of the branches used in the expression
 std::vector<std::string> GetUsedBranchesNames(const std::string expression, TObjArray *branches,
                                               const std::vector<std::string> &tmpBranches)
 {
@@ -46,6 +48,8 @@ std::vector<std::string> GetUsedBranchesNames(const std::string expression, TObj
    return usedBranches;
 }
 
+// Jit a string filter or a string temporary column, call this->AddColumn or this->Filter as needed
+// Return pointer to the new functional chain node returned by the call, cast to Long_t
 Long_t InterpretCall(void *thisPtr, const std::string &methodName, const std::string &nodeTypeName,
                      const std::string &name, const std::string &expression, TObjArray *branches,
                      const std::vector<std::string> &tmpBranches,
@@ -147,6 +151,8 @@ Long_t InterpretCall(void *thisPtr, const std::string &methodName, const std::st
    return retVal;
 }
 
+// Jit and call "this->Action(params...)" for all actions that support branch type inference
+// Return pointer to corresponding TActionResultProxy, cast to Long_t
 Long_t CreateActionGuessed(const BranchNames_t &bl, const std::string &nodeTypename, void *thisPtr,
                            const std::type_info &art, const std::type_info &at, const void *r, TTree *tree,
                            ROOT::Detail::TDataFrameBranchBase *bbase)
@@ -194,7 +200,7 @@ Long_t CreateActionGuessed(const BranchNames_t &bl, const std::string &nodeTypen
    }
    return retVal;
 }
-}
+} // namespace Internal
 
 namespace Experimental {
 

@@ -22,35 +22,37 @@
 
 // A simple helper function to fill a test tree: this makes the example
 // stand-alone.
-void fill_tree(const char* filename, const char* treeName) {
-   TFile f(filename,"RECREATE");
-   TTree t(treeName,treeName);
+void fill_tree(const char *filename, const char *treeName)
+{
+   TFile f(filename, "RECREATE");
+   TTree t(treeName, treeName);
    double b1;
    int b2;
    t.Branch("b1", &b1);
    t.Branch("b2", &b2);
-   for(int i = 0; i < 10; ++i) {
+   for (int i = 0; i < 10; ++i) {
       b1 = i;
-      b2 = i*i;
+      b2 = i * i;
       t.Fill();
    }
    t.Write();
    f.Close();
    return;
 }
 
-int tdf001_introduction() {
+int tdf001_introduction()
+{
 
    // We prepare an input tree to run on
    auto fileName = "tdf001_introduction.root";
    auto treeName = "myTree";
-   fill_tree(fileName,treeName);
+   fill_tree(fileName, treeName);
 
    // We read the tree from the file and create a TDataFrame, a class that
    // allows us to interact with the data contained in the tree.
    // We select a default column, a *branch* to adopt ROOT jargon, which will
    // be looked at if none is specified by the user when dealing with filters
-   //and actions.
+   // and actions.
    ROOT::Experimental::TDataFrame d(treeName, fileName, {"b1"});
 
    // ## Operations on the dataframe
@@ -68,8 +70,8 @@ int tdf001_introduction() {
    // filters. Here we show how the automatic selection of the column kicks
    // in in case the user specifies none.
    auto entries1 = d.Filter(cutb1) // <- no column name specified here!
-                   .Filter(cutb1b2, {"b2","b1"})
-                   .Count();
+                      .Filter(cutb1b2, {"b2", "b1"})
+                      .Count();
 
    std::cout << *entries1 << " entries passed all filters" << std::endl;
 
@@ -82,13 +84,13 @@ int tdf001_introduction() {
    // ### `Min`, `Max` and `Mean` actions
    // These actions allow to retrieve statistical information about the entries
    // passing the cuts, if any.
-   auto b1b2_cut = d.Filter(cutb1b2, {"b2","b1"});
+   auto b1b2_cut = d.Filter(cutb1b2, {"b2", "b1"});
    auto minVal = b1b2_cut.Min();
    auto maxVal = b1b2_cut.Max();
    auto meanVal = b1b2_cut.Mean();
    auto nonDefmeanVal = b1b2_cut.Mean("b2"); // <- Column is not the default
-   std::cout << "The mean is always included between the min and the max: "
-             << *minVal << " <= " << *meanVal << " <= " << *maxVal << std::endl;
+   std::cout << "The mean is always included between the min and the max: " << *minVal << " <= " << *meanVal
+             << " <= " << *maxVal << std::endl;
 
    // ### `Take` action
    // The `Take` action allows to retrieve all values of the variable stored in a
@@ -99,8 +101,7 @@ int tdf001_introduction() {
    auto b1Vec = b1_cut.Take<double, std::vector<double>>();
 
    std::cout << "Selected b1 entries" << std::endl;
-   for(auto b1_entry : *b1List)
-      std::cout << b1_entry << " ";
+   for (auto b1_entry : *b1List) std::cout << b1_entry << " ";
    std::cout << std::endl;
    auto b1VecCl = TClass::GetClass(typeid(*b1Vec));
    std::cout << "The type of b1Vec is" << b1VecCl->GetName() << std::endl;
@@ -118,8 +119,7 @@ int tdf001_introduction() {
    // In this case we fill a histogram. In some sense this is a violation of a
    // purely functional paradigm - C++ allows to do that.
    TH1F h("h", "h", 12, -1, 11);
-   d.Filter([](int b2) { return b2 % 2 == 0; }, {"b2"})
-    .Foreach([&h](double b1) { h.Fill(b1); });
+   d.Filter([](int b2) { return b2 % 2 == 0; }, {"b2"}).Foreach([&h](double b1) { h.Fill(b1); });
 
    std::cout << "Filled h with " << h.GetEntries() << " entries" << std::endl;
 
@@ -131,8 +131,8 @@ int tdf001_introduction() {
    // writing the entire pipeline on one line. This can be easily achieved.
    // We'll show this re-working the `Count` example:
    auto cutb1_result = d.Filter(cutb1);
-   auto cutb1b2_result = d.Filter(cutb1b2, {"b2","b1"});
-   auto cutb1_cutb1b2_result = cutb1_result.Filter(cutb1b2, {"b2","b1"});
+   auto cutb1b2_result = d.Filter(cutb1b2, {"b2", "b1"});
+   auto cutb1_cutb1b2_result = cutb1_result.Filter(cutb1b2, {"b2", "b1"});
    // Now we want to count:
    auto evts_cutb1_result = cutb1_result.Count();
    auto evts_cutb1b2_result = cutb1b2_result.Count();
@@ -156,22 +156,20 @@ int tdf001_introduction() {
    // any value of any type.
    // Let's dive in an example:
    auto entries_sum = d.AddColumn("sum", [](double b1, int b2) { return b2 + b1; }, {"b1", "b2"})
-                       .Filter([](double sum) { return sum > 4.2; }, {"sum"})
-                       .Count();
+                         .Filter([](double sum) { return sum > 4.2; }, {"sum"})
+                         .Count();
    std::cout << *entries_sum << std::endl;
 
    // Additional columns can be expressed as strings. The content must be C++
    // code. The name of the variables must be the name of the branches. The code
    // is just in time compiled.
-   auto entries_sum2 = d.AddColumn("sum", "b1 + b2")
-                        .Filter("sum > 4.2")
-                        .Count();
+   auto entries_sum2 = d.AddColumn("sum", "b1 + b2").Filter("sum > 4.2").Count();
    std::cout << *entries_sum2 << std::endl;
 
    return 0;
 }
 
-int main(){
+int main()
+{
    return tdf001_introduction();
 }
-
@@ -30,29 +30,30 @@ using CylFourVector = ROOT::Math::RhoEtaPhiVector;
 
 // A simple helper function to fill a test tree: this makes the example
 // stand-alone.
-void fill_tree(const char* filename, const char* treeName) {
-   TFile f(filename,"RECREATE");
-   TTree t(treeName,treeName);
+void fill_tree(const char *filename, const char *treeName)
+{
+   TFile f(filename, "RECREATE");
+   TTree t(treeName, treeName);
    FourVectors tracks;
    t.Branch("tracks", &tracks);
 
-   const double M = 0.13957;  // set pi+ mass
+   const double M = 0.13957; // set pi+ mass
    TRandom3 R(1);
 
    for (int i = 0; i < 50; ++i) {
       auto nPart = R.Poisson(15);
       tracks.clear();
       tracks.reserve(nPart);
       for (int j = 0; j < nPart; ++j) {
-         double px = R.Gaus(0,10);
-         double py = R.Gaus(0,10);
-         double pt = sqrt(px*px +py*py);
-         double eta = R.Uniform(-3,3);
-         double phi = R.Uniform(0.0 , 2*TMath::Pi() );
-         CylFourVector vcyl( pt, eta, phi);
+         double px = R.Gaus(0, 10);
+         double py = R.Gaus(0, 10);
+         double pt = sqrt(px * px + py * py);
+         double eta = R.Uniform(-3, 3);
+         double phi = R.Uniform(0.0, 2 * TMath::Pi());
+         CylFourVector vcyl(pt, eta, phi);
          // set energy
-         double E = sqrt( vcyl.R()*vcyl.R() + M*M);
-         FourVector q( vcyl.X(), vcyl.Y(), vcyl.Z(), E);
+         double E = sqrt(vcyl.R() * vcyl.R() + M * M);
+         FourVector q(vcyl.X(), vcyl.Y(), vcyl.Z(), E);
          // fill track vector
          tracks.emplace_back(q);
       }
@@ -64,12 +65,13 @@ void fill_tree(const char* filename, const char* treeName) {
    return;
 }
 
-int tdf002_dataModel() {
+int tdf002_dataModel()
+{
 
    // We prepare an input tree to run on
    auto fileName = "tdf002_dataModel.root";
    auto treeName = "myTree";
-   fill_tree(fileName,treeName);
+   fill_tree(fileName, treeName);
 
    // We read the tree from the file and create a TDataFrame, a class that
    // allows us to interact with the data contained in the tree.
@@ -78,39 +80,36 @@ int tdf002_dataModel() {
    // ## Operating on branches which are collection of objects
    // Here we deal with the simplest of the cuts: we decide to accept the event
    // only if the number of tracks is greater than 5.
-   auto n_cut = [](const FourVectors & tracks) { return tracks.size() > 8; };
-   auto nentries = d.Filter(n_cut, {"tracks"})
-                   .Count();
+   auto n_cut = [](const FourVectors &tracks) { return tracks.size() > 8; };
+   auto nentries = d.Filter(n_cut, {"tracks"}).Count();
 
    std::cout << *nentries << " passed all filters" << std::endl;
 
    // Another possibility consists in creating a new column containing the
    // quantity we are interested in.
    // In this example, we will cut on the number of tracks and plot their
    // transverse momentum.
-   auto getPt = [](const FourVectors& tracks) {
+   auto getPt = [](const FourVectors &tracks) {
       std::vector<double> pts;
       pts.reserve(tracks.size());
-      for (auto& t:tracks)
-         pts.emplace_back(t.Pt());
+      for (auto &t : tracks) pts.emplace_back(t.Pt());
       return pts;
-      };
+   };
 
    // We do the same for the weights.
-   auto getPtWeights = [](const FourVectors& tracks) {
+   auto getPtWeights = [](const FourVectors &tracks) {
       std::vector<double> ptsw;
       ptsw.reserve(tracks.size());
-      for (auto& t:tracks)
-         ptsw.emplace_back(1./t.Pt());
+      for (auto &t : tracks) ptsw.emplace_back(1. / t.Pt());
       return ptsw;
-      };
+   };
 
-   auto augmented_d = d.AddColumn("tracks_n", [](const FourVectors& tracks){return (int)tracks.size();})
-                       .Filter([](int tracks_n){return tracks_n > 2;}, {"tracks_n"})
-                       .AddColumn("tracks_pts", getPt)
-                       .AddColumn("tracks_pts_weights", getPtWeights);
+   auto augmented_d = d.AddColumn("tracks_n", [](const FourVectors &tracks) { return (int)tracks.size(); })
+                         .Filter([](int tracks_n) { return tracks_n > 2; }, {"tracks_n"})
+                         .AddColumn("tracks_pts", getPt)
+                         .AddColumn("tracks_pts_weights", getPtWeights);
 
-   auto trN = augmented_d.Histo1D("tracks_n",40,-.5,39.5);
+   auto trN = augmented_d.Histo1D("tracks_n", 40, -.5, 39.5);
    auto trPts = augmented_d.Histo1D("tracks_pts");
    auto trWPts = augmented_d.Histo1D<std::vector<double>, std::vector<double>>("tracks_pts", "tracks_pts_weights");
 
@@ -129,7 +128,7 @@ int tdf002_dataModel() {
    return 0;
 }
 
-int main(){
+int main()
+{
    return tdf002_dataModel();
 }
-
@@ -19,19 +19,19 @@
 
 // A simple helper function to fill a test tree: this makes the example
 // stand-alone.
-void fill_tree(const char* filename, const char* treeName)
+void fill_tree(const char *filename, const char *treeName)
 {
-   TFile f(filename,"RECREATE");
-   TTree t(treeName,treeName);
+   TFile f(filename, "RECREATE");
+   TTree t(treeName, treeName);
    float px, py, pz;
    t.Branch("px", &px);
    t.Branch("py", &py);
    t.Branch("pz", &pz);
-   for (int i=0; i<25000; i++) {
-      gRandom->Rannor(px,py);
-      pz = px*px + py*py;
+   for (int i = 0; i < 25000; i++) {
+      gRandom->Rannor(px, py);
+      pz = px * px + py * py;
       t.Fill();
-    }
+   }
    t.Write();
    f.Close();
    return;
@@ -42,20 +42,20 @@ void tdf003_profiles()
    // We prepare an input tree to run on
    auto fileName = "tdf003_profiles.root";
    auto treeName = "myTree";
-   fill_tree(fileName,treeName);
+   fill_tree(fileName, treeName);
 
    // We read the tree from the file and create a TDataFrame.
-   ROOT::Experimental::TDataFrame d(treeName, fileName, {"px","py","pz"});
+   ROOT::Experimental::TDataFrame d(treeName, fileName, {"px", "py", "pz"});
 
    // Create the profiles
-   auto hprof1d = d.Profile1D<float, float>(TProfile("hprof1d","Profile of pz versus px",64,-4,4));
-   auto hprof2d = d.Profile2D<float, float, float>(TProfile2D("hprof2d","Profile of pz versus px and py",40,-4,4,40,-4,4,0,20));
+   auto hprof1d = d.Profile1D<float, float>(TProfile("hprof1d", "Profile of pz versus px", 64, -4, 4));
+   auto hprof2d = d.Profile2D<float, float, float>(
+      TProfile2D("hprof2d", "Profile of pz versus px and py", 40, -4, 4, 40, -4, 4, 0, 20));
 
    // And Draw
-   auto c1 = new TCanvas("c1","Profile histogram example",200,10,700,500);
+   auto c1 = new TCanvas("c1", "Profile histogram example", 200, 10, 700, 500);
    hprof1d->DrawClone();
-   auto c2 = new TCanvas("c2","Profile2D histogram example",200,10,700,500);
+   auto c2 = new TCanvas("c2", "Profile2D histogram example", 200, 10, 700, 500);
    c2->cd();
    hprof2d->DrawClone();
-
 }