From 12eb94d907fe5ee7809127662b271bfd8d913968 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Wed, 5 Jul 2017 23:53:25 +0200 Subject: [PATCH 1/9] [TDF] Rename GetDefault{Branches,ColumnNames}, fDefault{Branches,Columns} --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 8 ++++---- tree/treeplayer/inc/ROOT/TDFNodes.hxx | 4 ++-- tree/treeplayer/inc/ROOT/TDataFrame.hxx | 2 +- tree/treeplayer/src/TDFNodes.cxx | 7 ++++--- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 0f0d67c0dd02f..edf7628c664d5 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -201,7 +201,7 @@ public: { TDFInternal::CheckFilter(f); auto df = GetDataFrameChecked(); - const ColumnNames_t &defBl = df->GetDefaultBranches(); + const ColumnNames_t &defBl = df->GetDefaultColumnNames(); auto nArgs = TTraits::CallableTraits::arg_types::list_size; const ColumnNames_t &actualBl = TDFInternal::PickBranchNames(nArgs, bn, defBl); using DFF_t = TDFDetail::TFilter; @@ -280,7 +280,7 @@ public: { auto df = GetDataFrameChecked(); TDFInternal::CheckTmpBranch(name, df->GetTree()); - const ColumnNames_t &defBl = df->GetDefaultBranches(); + const ColumnNames_t &defBl = df->GetDefaultColumnNames(); auto nArgs = TTraits::CallableTraits::arg_types::list_size; const ColumnNames_t &actualBl = TDFInternal::PickBranchNames(nArgs, bl, defBl); using DFB_t = TDFDetail::TCustomColumn; @@ -477,7 +477,7 @@ public: void ForeachSlot(F f, const ColumnNames_t &bl = {}) { auto df = GetDataFrameChecked(); - const ColumnNames_t &defBl = df->GetDefaultBranches(); + const ColumnNames_t &defBl = df->GetDefaultColumnNames(); auto nArgs = TTraits::CallableTraits::arg_types::list_size; const ColumnNames_t &actualBl = TDFInternal::PickBranchNames(nArgs - 1, bl, defBl); using Helper_t = TDFInternal::ForeachSlotHelper; @@ -1134,7 +1134,7 @@ protected: const ColumnNames_t GetDefaultBranchNames(unsigned int nExpectedBranches, std::string_view actionNameForErr) { auto df = GetDataFrameChecked(); - const ColumnNames_t &defaultBranches = df->GetDefaultBranches(); + const ColumnNames_t &defaultBranches = df->GetDefaultColumnNames(); const auto dBSize = defaultBranches.size(); if (nExpectedBranches > dBSize) { std::string msg("Trying to deduce the branches from the default list in order to "); diff --git a/tree/treeplayer/inc/ROOT/TDFNodes.hxx b/tree/treeplayer/inc/ROOT/TDFNodes.hxx index 6397657cd3e5e..5e979222e4297 100644 --- a/tree/treeplayer/inc/ROOT/TDFNodes.hxx +++ b/tree/treeplayer/inc/ROOT/TDFNodes.hxx @@ -61,7 +61,7 @@ class TLoopManager : public std::enable_shared_from_this { std::vector> fResProxyReadiness; ::TDirectory * const fDirPtr{nullptr}; std::shared_ptr fTree{nullptr}; - const ColumnNames_t fDefaultBranches; + const ColumnNames_t fDefaultColumns; const ULong64_t fNEmptyEntries{0}; const unsigned int fNSlots{1}; bool fHasRunAtLeastOnce{false}; @@ -89,7 +89,7 @@ public: void Run(); TLoopManager *GetImplPtr(); std::shared_ptr GetSharedPtr() { return shared_from_this(); } - const ColumnNames_t &GetDefaultBranches() const; + const ColumnNames_t &GetDefaultColumnNames() const; const ColumnNames_t GetTmpBranches() const { return {}; }; TTree *GetTree() const; TCustomColumnBase *GetBookedBranch(const std::string &name) const; diff --git a/tree/treeplayer/inc/ROOT/TDataFrame.hxx b/tree/treeplayer/inc/ROOT/TDataFrame.hxx index 1851fc1210c6e..96c690b9adcb9 100644 --- a/tree/treeplayer/inc/ROOT/TDataFrame.hxx +++ b/tree/treeplayer/inc/ROOT/TDataFrame.hxx @@ -80,7 +80,7 @@ inline std::string printValue(ROOT::Experimental::TDataFrame *tdf) { auto df = tdf->GetDataFrameChecked(); auto *tree = df->GetTree(); - auto defBranches = df->GetDefaultBranches(); + auto defBranches = df->GetDefaultColumnNames(); auto tmpBranches = df->GetTmpBranches(); std::ostringstream ret; diff --git a/tree/treeplayer/src/TDFNodes.cxx b/tree/treeplayer/src/TDFNodes.cxx index de11ec055a896..d8112de0e48c7 100644 --- a/tree/treeplayer/src/TDFNodes.cxx +++ b/tree/treeplayer/src/TDFNodes.cxx @@ -130,7 +130,7 @@ unsigned int TSlotStack::Pop() } TLoopManager::TLoopManager(TTree *tree, const ColumnNames_t &defaultBranches) - : fTree(std::shared_ptr(tree, [](TTree *) {})), fDefaultBranches(defaultBranches), + : fTree(std::shared_ptr(tree, [](TTree *) {})), fDefaultColumns(defaultBranches), fNSlots(TDFInternal::GetNSlots()), fLoopType(ELoopType::kROOTFiles) { } @@ -329,9 +329,10 @@ TLoopManager *TLoopManager::GetImplPtr() return this; } -const ColumnNames_t &TLoopManager::GetDefaultBranches() const +/// Return the list of default columns -- empty if none was provided when constructing the TDataFrame +const ColumnNames_t &TLoopManager::GetDefaultColumnNames() const { - return fDefaultBranches; + return fDefaultColumns; } TTree *TLoopManager::GetTree() const From ead3a7197b39f6b39a34bf758454143b5a8d3a41 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Wed, 12 Jul 2017 23:28:23 +0200 Subject: [PATCH 2/9] [TDF] Use "column" instead of "branch" in Reduce action --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 30 +++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index edf7628c664d5..3917282fbed0b 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -487,46 +487,46 @@ public: } //////////////////////////////////////////////////////////////////////////// - /// \brief Execute a user-defined reduce operation on the values of a branch + /// \brief Execute a user-defined reduce operation on the values of a column. /// \tparam F The type of the reduce callable. Automatically deduced. - /// \tparam T The type of the branch to apply the reduction to. Automatically deduced. + /// \tparam T The type of the column to apply the reduction to. Automatically deduced. /// \param[in] f A callable with signature `T(T,T)` - /// \param[in] branchName The branch to be reduced. If omitted, the default branch is used instead. + /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead. /// - /// A reduction takes two values of a branch and merges them into one (e.g. + /// A reduction takes two values of a column and merges them into one (e.g. /// by summing them, taking the maximum, etc). This action performs the - /// specified reduction operation on all branch values, returning + /// specified reduction operation on all processed column values, returning /// a single value of the same type. The callable f must satisfy the general /// requirements of a *processing function* besides having signature `T(T,T)` - /// where `T` is the type of branch. + /// where `T` is the type of column columnName. /// /// This action is *lazy*: upon invocation of this method the calculation is /// booked but not executed. See TResultProxy documentation. template ::ret_type> - TResultProxy Reduce(F f, std::string_view branchName = {}) + TResultProxy Reduce(F f, std::string_view columnName = "") { static_assert(std::is_default_constructible::value, "reduce object cannot be default-constructed. Please provide an initialisation value (initValue)"); - return Reduce(std::move(f), branchName, T()); + return Reduce(std::move(f), columnName, T()); } //////////////////////////////////////////////////////////////////////////// - /// \brief Execute a user-defined reduce operation on the values of a branch + /// \brief Execute a user-defined reduce operation on the values of a column. /// \tparam F The type of the reduce callable. Automatically deduced. - /// \tparam T The type of the branch to apply the reduction to. Automatically deduced. + /// \tparam T The type of the column to apply the reduction to. Automatically deduced. /// \param[in] f A callable with signature `T(T,T)` - /// \param[in] branchName The branch to be reduced. If omitted, the default branch is used instead. - /// \param[in] initValue The reduced object is initialised to this value rather than being default-constructed + /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead. + /// \param[in] initValue The reduced object is initialised to this value rather than being default-constructed. /// - /// See the description of the other Reduce overload for more information. + /// See the description of the first Reduce overload for more information. template ::ret_type> - TResultProxy Reduce(F f, std::string_view branchName, const T &initValue) + TResultProxy Reduce(F f, std::string_view columnName, const T &initValue) { using arg_types = typename TTraits::CallableTraits::arg_types; TDFInternal::CheckReduce(f, arg_types()); auto df = GetDataFrameChecked(); unsigned int nSlots = df->GetNSlots(); - auto bl = GetBranchNames({branchName}, "reduce branch values"); + auto bl = GetBranchNames({columnName}, "reduce branch values"); auto redObjPtr = std::make_shared(initValue); using Helper_t = TDFInternal::ReduceHelper; using Action_t = typename TDFInternal::TAction; From c529f49cd8be4efe266f3c28f8cdecfe6e760c4b Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Thu, 6 Jul 2017 00:08:31 +0200 Subject: [PATCH 3/9] [TDF] Use SelectColumns instead of PickBranchNames SelectColumns chooses between the optional user-provided set of column names and the default set. Throws in case of error. Picks an incremental number of column names from the default set until the required number is reached, instead of requiring that the number of default branches is strictly equal to the number of required branches for the transformation/action. --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 13 ++++----- tree/treeplayer/inc/ROOT/TDFUtils.hxx | 2 +- tree/treeplayer/src/TDFUtils.cxx | 32 ++++++++++++----------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 3917282fbed0b..12777b1038c16 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -203,7 +203,7 @@ public: auto df = GetDataFrameChecked(); const ColumnNames_t &defBl = df->GetDefaultColumnNames(); auto nArgs = TTraits::CallableTraits::arg_types::list_size; - const ColumnNames_t &actualBl = TDFInternal::PickBranchNames(nArgs, bn, defBl); + const auto actualBl = TDFInternal::SelectColumns(nArgs, bn, defBl); using DFF_t = TDFDetail::TFilter; auto FilterPtr = std::make_shared(std::move(f), actualBl, *fProxiedPtr, name); df->Book(FilterPtr); @@ -282,7 +282,7 @@ public: TDFInternal::CheckTmpBranch(name, df->GetTree()); const ColumnNames_t &defBl = df->GetDefaultColumnNames(); auto nArgs = TTraits::CallableTraits::arg_types::list_size; - const ColumnNames_t &actualBl = TDFInternal::PickBranchNames(nArgs, bl, defBl); + const auto actualBl = TDFInternal::SelectColumns(nArgs, bl, defBl); using DFB_t = TDFDetail::TCustomColumn; const std::string nameInt(name); auto BranchPtr = std::make_shared(nameInt, std::move(expression), actualBl, *fProxiedPtr); @@ -479,7 +479,7 @@ public: auto df = GetDataFrameChecked(); const ColumnNames_t &defBl = df->GetDefaultColumnNames(); auto nArgs = TTraits::CallableTraits::arg_types::list_size; - const ColumnNames_t &actualBl = TDFInternal::PickBranchNames(nArgs - 1, bl, defBl); + const auto actualBl = TDFInternal::SelectColumns(nArgs - 1, bl, defBl); using Helper_t = TDFInternal::ForeachSlotHelper; using Action_t = TDFInternal::TAction; df->Book(std::make_shared(Helper_t(std::move(f)), actualBl, *fProxiedPtr)); @@ -525,12 +525,13 @@ public: using arg_types = typename TTraits::CallableTraits::arg_types; TDFInternal::CheckReduce(f, arg_types()); auto df = GetDataFrameChecked(); - unsigned int nSlots = df->GetNSlots(); - auto bl = GetBranchNames({columnName}, "reduce branch values"); + const auto &defBl = df->GetDefaultColumnNames(); + const ColumnNames_t userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); + const auto actualBl = TDFInternal::SelectColumns(1, userColumns, defBl); auto redObjPtr = std::make_shared(initValue); using Helper_t = TDFInternal::ReduceHelper; using Action_t = typename TDFInternal::TAction; - df->Book(std::make_shared(Helper_t(std::move(f), redObjPtr, nSlots), bl, *fProxiedPtr)); + df->Book(std::make_shared(Helper_t(std::move(f), redObjPtr, df->GetNSlots()), actualBl, *fProxiedPtr)); return MakeResultProxy(redObjPtr, df); } diff --git a/tree/treeplayer/inc/ROOT/TDFUtils.hxx b/tree/treeplayer/inc/ROOT/TDFUtils.hxx index ba37441025809..1771440aaa0c7 100644 --- a/tree/treeplayer/inc/ROOT/TDFUtils.hxx +++ b/tree/treeplayer/inc/ROOT/TDFUtils.hxx @@ -182,7 +182,7 @@ void CheckReduce(F &, T) } /// Returns local BranchNames or default BranchNames according to which one should be used -const ColumnNames_t &PickBranchNames(unsigned int nArgs, const ColumnNames_t &bl, const ColumnNames_t &defBl); +const ColumnNames_t SelectColumns(unsigned int nArgs, const ColumnNames_t &bl, const ColumnNames_t &defBl); namespace ActionTypes { struct Histo1D { diff --git a/tree/treeplayer/src/TDFUtils.cxx b/tree/treeplayer/src/TDFUtils.cxx index 800db4fc56f3e..6b42b4cac9734 100644 --- a/tree/treeplayer/src/TDFUtils.cxx +++ b/tree/treeplayer/src/TDFUtils.cxx @@ -143,23 +143,25 @@ void CheckTmpBranch(std::string_view branchName, TTree *treePtr) } } -/// Returns local BranchNames or default BranchNames according to which one should be used -const ColumnNames_t &PickBranchNames(unsigned int nArgs, const ColumnNames_t &bl, const ColumnNames_t &defBl) +/// Choose between local column names or default column names, throw in case of errors. +const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, + const ColumnNames_t &defaultNames) { - bool useDefBl = false; - if (nArgs != bl.size()) { - if (bl.size() == 0 && nArgs == defBl.size()) { - useDefBl = true; - } else { - auto msg = "mismatch between number of filter/define arguments (" + std::to_string(nArgs) + - ") and number of columns specified (" + std::to_string(bl.size() ? bl.size() : defBl.size()) + - "). Please check the number of arguments of the function/lambda/functor and the number of branches " - "specified."; - throw std::runtime_error(msg); - } + if (names.empty()) { + // use default column names + if (defaultNames.size() < nRequiredNames) + throw std::runtime_error(std::to_string(nRequiredNames) + + " column names are required but none were provided and the default list has size " + + std::to_string(defaultNames.size())); + // return first nRequiredNames default column names + return ColumnNames_t(defaultNames.begin(), defaultNames.begin() + nRequiredNames); + } else { + // use column names provided by the user to this particular transformation/action + if (names.size() != nRequiredNames) + throw std::runtime_error(std::to_string(nRequiredNames) + " column names are required but " + + std::to_string(names.size()) + " were provided."); + return names; } - - return useDefBl ? defBl : bl; } } // end NS TDF From 301ee7f02f2384347241507ad412c2f1de8b6bc1 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Thu, 13 Jul 2017 00:56:10 +0200 Subject: [PATCH 4/9] [TDF] Use SelectColumns instead of GetBranchNames in Take --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 12777b1038c16..90568d83c6590 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -578,7 +578,9 @@ public: { auto df = GetDataFrameChecked(); unsigned int nSlots = df->GetNSlots(); - auto bl = GetBranchNames({branchName}, "get the values of the branch"); + const ColumnNames_t &defBl = df->GetDefaultColumnNames(); + const ColumnNames_t userColumns = branchName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(branchName)}); + const auto bl = TDFInternal::SelectColumns(1, userColumns, defBl); auto valuesPtr = std::make_shared(); using Helper_t = TDFInternal::TakeHelper; using Action_t = TDFInternal::TAction; From 0c52782d7f68df87002032c46c965ef8dd141957 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Thu, 13 Jul 2017 02:03:10 +0200 Subject: [PATCH 5/9] [TDF] Use SelectColumn instead of GetBranchNames in Histo and Fill actions --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 104 ++++++++++++++++------ tree/treeplayer/src/TDFInterface.cxx | 9 ++ tree/treeplayer/src/TDFUtils.cxx | 1 + 3 files changed, 85 insertions(+), 29 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 90568d83c6590..f9b8233d36bd8 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -138,6 +138,8 @@ std::shared_ptr *MakeSharedOnHeap(const std::shared_ptr &shPtr) return new std::shared_ptr(shPtr); } +bool AtLeastOneEmptyString(const std::vector strings); + } // namespace TDF } // namespace Internal @@ -605,11 +607,11 @@ public: template TResultProxy<::TH1D> Histo1D(::TH1D &&model = ::TH1D{"", "", 128u, 0., 0.}, std::string_view vName = "") { - auto bl = GetBranchNames({vName}, "fill the histogram"); + const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)}); auto h = std::make_shared<::TH1D>(std::move(model)); if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin()) TDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h); - return CreateAction(bl, h); + return CreateAction(userColumns, h); } template @@ -636,9 +638,12 @@ public: template TResultProxy<::TH1D> Histo1D(::TH1D &&model, std::string_view vName, std::string_view wName) { - auto bl = GetBranchNames({vName, wName}, "fill the histogram"); + auto columnViews = { vName, wName }; + const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews) + ? ColumnNames_t() + : ColumnNames_t(columnViews.begin(), columnViews.end()); auto h = std::make_shared<::TH1D>(std::move(model)); - return CreateAction(bl, h); + return CreateAction(userColumns, h); } template @@ -671,8 +676,11 @@ public: if (!TDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) { throw std::runtime_error("2D histograms with no axes limits are not supported yet."); } - auto bl = GetBranchNames({v1Name, v2Name}, "fill the histogram"); - return CreateAction(bl, h); + auto columnViews = {v1Name, v2Name}; + const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews) + ? ColumnNames_t() + : ColumnNames_t(columnViews.begin(), columnViews.end()); + return CreateAction(userColumns, h); } //////////////////////////////////////////////////////////////////////////// @@ -697,8 +705,11 @@ public: if (!TDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) { throw std::runtime_error("2D histograms with no axes limits are not supported yet."); } - auto bl = GetBranchNames({v1Name, v2Name, wName}, "fill the histogram"); - return CreateAction(bl, h); + auto columnViews = {v1Name, v2Name, wName}; + const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews) + ? ColumnNames_t() + : ColumnNames_t(columnViews.begin(), columnViews.end()); + return CreateAction(userColumns, h); } template @@ -729,8 +740,11 @@ public: if (!TDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) { throw std::runtime_error("3D histograms with no axes limits are not supported yet."); } - auto bl = GetBranchNames({v1Name, v2Name, v3Name}, "fill the histogram"); - return CreateAction(bl, h); + auto columnViews = {v1Name, v2Name, v3Name}; + const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews) + ? ColumnNames_t() + : ColumnNames_t(columnViews.begin(), columnViews.end()); + return CreateAction(userColumns, h); } //////////////////////////////////////////////////////////////////////////// @@ -757,8 +771,11 @@ public: if (!TDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) { throw std::runtime_error("3D histograms with no axes limits are not supported yet."); } - auto bl = GetBranchNames({v1Name, v2Name, v3Name, wName}, "fill the histogram"); - return CreateAction(bl, h); + auto columnViews = {v1Name, v2Name, v3Name, wName}; + const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews) + ? ColumnNames_t() + : ColumnNames_t(columnViews.begin(), columnViews.end()); + return CreateAction(userColumns, h); } template @@ -785,8 +802,11 @@ public: if (!TDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) { throw std::runtime_error("Profiles with no axes limits are not supported yet."); } - auto bl = GetBranchNames({v1Name, v2Name}, "fill the 1D Profile"); - return CreateAction(bl, h); + auto columnViews = {v1Name, v2Name}; + const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews) + ? ColumnNames_t() + : ColumnNames_t(columnViews.begin(), columnViews.end()); + return CreateAction(userColumns, h); } //////////////////////////////////////////////////////////////////////////// @@ -811,8 +831,11 @@ public: if (!TDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) { throw std::runtime_error("Profile histograms with no axes limits are not supported yet."); } - auto bl = GetBranchNames({v1Name, v2Name, wName}, "fill the 1D profile"); - return CreateAction(bl, h); + auto columnViews = {v1Name, v2Name, wName}; + const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews) + ? ColumnNames_t() + : ColumnNames_t(columnViews.begin(), columnViews.end()); + return CreateAction(userColumns, h); } template @@ -843,8 +866,11 @@ public: if (!TDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) { throw std::runtime_error("2D profiles with no axes limits are not supported yet."); } - auto bl = GetBranchNames({v1Name, v2Name, v3Name}, "fill the 2D profile"); - return CreateAction(bl, h); + auto columnViews = {v1Name, v2Name, v3Name}; + const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews) + ? ColumnNames_t() + : ColumnNames_t(columnViews.begin(), columnViews.end()); + return CreateAction(userColumns, h); } //////////////////////////////////////////////////////////////////////////// @@ -871,8 +897,11 @@ public: if (!TDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) { throw std::runtime_error("2D profiles with no axes limits are not supported yet."); } - auto bl = GetBranchNames({v1Name, v2Name, v3Name, wName}, "fill the histogram"); - return CreateAction(bl, h); + auto columnViews = {v1Name, v2Name, v3Name, wName}; + const auto userColumns = TDFInternal::AtLeastOneEmptyString(columnViews) + ? ColumnNames_t() + : ColumnNames_t(columnViews.begin(), columnViews.end()); + return CreateAction(userColumns, h); } template @@ -883,15 +912,16 @@ public: //////////////////////////////////////////////////////////////////////////// /// \brief Fill and return any entity with a Fill method (*lazy action*) - /// \tparam BranchTypes The types of the branches the values of which are used to fill the object. + /// \tparam FirstBranch The first type of the branches the values of which are used to fill the object. + /// \tparam OtherBranches A list of the other types of the branches the values of which are used to fill the object. + /// \tparam T The type of the object to fill. Automatically deduced. /// \param[in] model The model to be considered to build the new return value. /// \param[in] bl The name of the branches read to fill the object. /// - /// The returned object is independent of the input one. - /// This action is *lazy*: upon invocation of this method the calculation is - /// booked but not executed. See TResultProxy documentation. /// The user gives up ownership of the model object. - /// It is compulsory to express the branches to be considered. + /// The list of column names to be used for filling must always be specified. + /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. + /// See TResultProxy documentation. template // need FirstBranch to disambiguate overloads TResultProxy Fill(T &&model, const ColumnNames_t &bl) { @@ -902,6 +932,14 @@ public: return CreateAction(bl, h); } + //////////////////////////////////////////////////////////////////////////// + /// \brief Fill and return any entity with a Fill method + /// \tparam T The type of the object to fill. Automatically deduced. + /// \param[in] model The model to be considered to build the new return value. + /// \param[in] bl The name of the branches read to fill the object. + /// + /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the + /// previous overload. Check the previous overload for more details on `Fill`. template TResultProxy Fill(T &&model, const ColumnNames_t &bl) { @@ -909,7 +947,7 @@ public: if (!TDFInternal::HistoUtils::HasAxisLimits(*h)) { throw std::runtime_error("The absence of axes limits is not supported yet."); } - return CreateAction(bl, h); + return CreateAction(bl, h, bl.size()); } //////////////////////////////////////////////////////////////////////////// @@ -1026,22 +1064,30 @@ private: TResultProxy CreateAction(const ColumnNames_t &bl, const std::shared_ptr &r) { auto df = GetDataFrameChecked(); + const ColumnNames_t &defBl = df->GetDefaultColumnNames(); + auto nColumns = sizeof...(BranchTypes); + const auto actualBl = TDFInternal::SelectColumns(nColumns, bl, defBl); unsigned int nSlots = df->GetNSlots(); - TDFInternal::BuildAndBook(bl, r, nSlots, *df, *fProxiedPtr, (ActionType *)nullptr); + TDFInternal::BuildAndBook(actualBl, r, nSlots, *df, *fProxiedPtr, (ActionType *)nullptr); return MakeResultProxy(r, df); } // User did not specify type, do type inference + // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for + // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes) template ::value, int>::type = 0> - TResultProxy CreateAction(const ColumnNames_t &bl, const std::shared_ptr &r) + TResultProxy CreateAction(const ColumnNames_t &bl, const std::shared_ptr &r, + const int nColumns = -1) { auto df = GetDataFrameChecked(); + const ColumnNames_t &defBl = df->GetDefaultColumnNames(); + const auto actualBl = TDFInternal::SelectColumns((nColumns > -1 ? nColumns : sizeof...(BranchTypes)), bl, defBl); unsigned int nSlots = df->GetNSlots(); const auto &tmpBranches = df->GetBookedBranches(); auto tree = df->GetTree(); auto rOnHeap = TDFInternal::MakeSharedOnHeap(r); - auto toJit = TDFInternal::JitBuildAndBook(bl, GetNodeTypeName(), fProxiedPtr.get(), + auto toJit = TDFInternal::JitBuildAndBook(actualBl, GetNodeTypeName(), fProxiedPtr.get(), typeid(std::shared_ptr), typeid(ActionType), rOnHeap, tree, nSlots, tmpBranches); df->Jit(toJit); diff --git a/tree/treeplayer/src/TDFInterface.cxx b/tree/treeplayer/src/TDFInterface.cxx index a51515efa73be..5541b21fa7105 100644 --- a/tree/treeplayer/src/TDFInterface.cxx +++ b/tree/treeplayer/src/TDFInterface.cxx @@ -221,6 +221,15 @@ std::string JitBuildAndBook(const ColumnNames_t &bl, const std::string &prevNode createAction_str << "}, " << nSlots << ", reinterpret_cast<" << actionResultTypeName << "*>(" << rOnHeap << "));"; return createAction_str.str(); } + +bool AtLeastOneEmptyString(const std::vector strings) +{ + for(const auto &s : strings) { + if (s.empty()) + return true; + } + return false; +} } // end ns TDF } // end ns Internal } // end ns ROOT diff --git a/tree/treeplayer/src/TDFUtils.cxx b/tree/treeplayer/src/TDFUtils.cxx index 6b42b4cac9734..f50839d4eef8d 100644 --- a/tree/treeplayer/src/TDFUtils.cxx +++ b/tree/treeplayer/src/TDFUtils.cxx @@ -147,6 +147,7 @@ void CheckTmpBranch(std::string_view branchName, TTree *treePtr) const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames) { + // TODO fix grammar in case nRequiredNames == 1 or names.size() == 1 if (names.empty()) { // use default column names if (defaultNames.size() < nRequiredNames) From c1adef50fd67daabf176dd842d4e8fcfe8ef500f Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Thu, 13 Jul 2017 18:53:09 +0200 Subject: [PATCH 6/9] [TDF] Use SelectColumns instead of GetBranchNames in Min,Max,Mean --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index f9b8233d36bd8..4488aea597227 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -962,9 +962,9 @@ public: template TResultProxy Min(std::string_view branchName = "") { - auto bl = GetBranchNames({branchName}, "calculate the minimum"); + const auto userColumns = branchName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(branchName)}); auto minV = std::make_shared(std::numeric_limits::max()); - return CreateAction(bl, minV); + return CreateAction(userColumns, minV); } //////////////////////////////////////////////////////////////////////////// @@ -979,9 +979,9 @@ public: template TResultProxy Max(std::string_view branchName = "") { - auto bl = GetBranchNames({branchName}, "calculate the maximum"); + const auto userColumns = branchName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(branchName)}); auto maxV = std::make_shared(std::numeric_limits::min()); - return CreateAction(bl, maxV); + return CreateAction(userColumns, maxV); } //////////////////////////////////////////////////////////////////////////// @@ -996,9 +996,9 @@ public: template TResultProxy Mean(std::string_view branchName = "") { - auto bl = GetBranchNames({branchName}, "calculate the mean"); + const auto userColumns = branchName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(branchName)}); auto meanV = std::make_shared(0); - return CreateAction(bl, meanV); + return CreateAction(userColumns, meanV); } //////////////////////////////////////////////////////////////////////////// From 6baafe4a1a01f230439b6491fd9f3df3d59e4845 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Thu, 13 Jul 2017 18:55:17 +0200 Subject: [PATCH 7/9] [TDF] Remove the GetBranchNames function --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 4488aea597227..8b0f9ca523faa 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -1035,29 +1035,6 @@ private: inline std::string GetNodeTypeName(); - /// Returns the default branches if needed, takes care of the error handling. - template - ColumnNames_t GetBranchNames(const std::vector &bl, std::string_view actionNameForErr) - { - constexpr auto isT2Void = std::is_same::value; - constexpr auto isT3Void = std::is_same::value; - constexpr auto isT4Void = std::is_same::value; - - unsigned int neededBranches = 1 + !isT2Void + !isT3Void + !isT4Void; - - unsigned int providedBranches = 0; - std::for_each(bl.begin(), bl.end(), [&providedBranches](std::string_view s) { - if (!s.empty()) providedBranches++; - }); - - if (neededBranches == providedBranches) { - ColumnNames_t bl2(bl.begin(), bl.end()); - return bl2; - } - - return GetDefaultBranchNames(neededBranches, actionNameForErr); - } - // Type was specified by the user, no need to infer it template ::value, int>::type = 0> From 41b8c8db0c166c4b4e3063f8d60532df61b92948 Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Thu, 13 Jul 2017 19:11:44 +0200 Subject: [PATCH 8/9] [TDF] Delete GetDefaultBranchNames Now unused --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 8b0f9ca523faa..ae7e8efae453b 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -1157,26 +1157,6 @@ protected: return df; } - const ColumnNames_t GetDefaultBranchNames(unsigned int nExpectedBranches, std::string_view actionNameForErr) - { - auto df = GetDataFrameChecked(); - const ColumnNames_t &defaultBranches = df->GetDefaultColumnNames(); - const auto dBSize = defaultBranches.size(); - if (nExpectedBranches > dBSize) { - std::string msg("Trying to deduce the branches from the default list in order to "); - msg += actionNameForErr; - msg += ". A set of branches of size "; - msg += std::to_string(dBSize); - msg += " was found. "; - msg += std::to_string(nExpectedBranches); - msg += 1 != nExpectedBranches ? " are" : " is"; - msg += " needed. Please specify the branches explicitly."; - throw std::runtime_error(msg); - } - auto bnBegin = defaultBranches.begin(); - return ColumnNames_t(bnBegin, bnBegin + nExpectedBranches); - } - TInterface(const std::shared_ptr &proxied, const std::weak_ptr &impl) : fProxiedPtr(proxied), fImplWeakPtr(impl) { From e22b85c71fb648652161d9d98188787009fec3ee Mon Sep 17 00:00:00 2001 From: Enrico Guiraud Date: Thu, 13 Jul 2017 19:14:24 +0200 Subject: [PATCH 9/9] [TDF] Rename GetUsedBranchNames -> FindUsedColumnNames. Complete ROOT-8879. --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 2 +- tree/treeplayer/src/TDFInterface.cxx | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index ae7e8efae453b..48a5d2f12276c 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -113,7 +113,7 @@ void CallBuildAndBook(PrevNodeType &prevNode, const ColumnNames_t &bl, unsigned delete rOnHeap; } -std::vector GetUsedBranchesNames(const std::string, TObjArray *, const std::vector &); +std::vector FindUsedColumnNames(const std::string, TObjArray *, const std::vector &); using TmpBranchBasePtr_t = std::shared_ptr; diff --git a/tree/treeplayer/src/TDFInterface.cxx b/tree/treeplayer/src/TDFInterface.cxx index 5541b21fa7105..be0df9223875c 100644 --- a/tree/treeplayer/src/TDFInterface.cxx +++ b/tree/treeplayer/src/TDFInterface.cxx @@ -33,8 +33,8 @@ namespace Internal { namespace TDF { // Match expression against names of branches passed as parameter // Return vector of names of the branches used in the expression -std::vector GetUsedBranchesNames(const std::string expression, TObjArray *branches, - const std::vector &tmpBranches) +std::vector FindUsedColumnNames(const std::string expression, TObjArray *branches, + const std::vector &tmpBranches) { // Check what branches and temporary branches are used in the expression // To help matching the regex @@ -68,7 +68,7 @@ Long_t JitTransformation(void *thisPtr, const std::string &methodName, const std const std::vector &tmpBranches, const std::map &tmpBookedBranches, TTree *tree) { - auto usedBranches = GetUsedBranchesNames(expression, branches, tmpBranches); + auto usedBranches = FindUsedColumnNames(expression, branches, tmpBranches); auto exprNeedsVariables = !usedBranches.empty(); // Move to the preparation of the jitting