From af8fde060ddb4c0ae3dfb9b4a5e8943ac8027d7c Mon Sep 17 00:00:00 2001 From: Danilo Piparo Date: Thu, 28 Sep 2017 15:45:57 +0200 Subject: [PATCH 1/5] [TDF] Add the GetColumnNames method to the TInterface to give the possibility to the user to get the column names from the TDataFrame nodes. --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 719e99fa4af10..320828c85c00f 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -1325,6 +1325,36 @@ public: fProxiedPtr->Report(); } + ///////////////////////////////////////////////////////////////////////////// + /// \brief Returns the names of the available columns + /// + /// This is not an action nor a transformation, just a simple utility to + /// get columns names out of the TDataFrame nodes. + ColumnNames_t GetColumnNames() + { + ColumnNames_t allColumns; + + auto addIfNotInternal = [&allColumns](std::string_view colName){ if (!TDFInternal::IsInternalColumn(colName)) allColumns.emplace_back(colName);}; + + std::for_each(fValidCustomColumns.begin(), fValidCustomColumns.end(), addIfNotInternal); + + auto df = GetDataFrameChecked(); + auto tree = df->GetTree(); + if (tree) { + const auto branches = tree->GetListOfBranches(); + for (auto branch : *branches) { + allColumns.emplace_back(branch->GetName()); + } + } + + if (fDataSource) { + auto &dsColNames = fDataSource->GetColumnNames(); + allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end()); + } + + return allColumns; + } + private: ColumnNames_t ConvertRegexToColumns(std::string_view columnNameRegexp) { From 977aa43f861ca0d92a26bbcaab0f9eb8fc81855d Mon Sep 17 00:00:00 2001 From: Danilo Piparo Date: Fri, 29 Sep 2017 06:48:19 +0200 Subject: [PATCH 2/5] [TDF] Test the GetColumnNames method of TDF::TInterface --- .../test/dataframe/dataframe_interface.cxx | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tree/treeplayer/test/dataframe/dataframe_interface.cxx b/tree/treeplayer/test/dataframe/dataframe_interface.cxx index 7e804dbab310e..9437874eb9b91 100644 --- a/tree/treeplayer/test/dataframe/dataframe_interface.cxx +++ b/tree/treeplayer/test/dataframe/dataframe_interface.cxx @@ -1,10 +1,12 @@ #include "ROOT/TDataFrame.hxx" +#include "ROOT/TTrivialDS.hxx" #include "TMemFile.h" #include "TTree.h" #include "gtest/gtest.h" using namespace ROOT::Experimental; +using namespace ROOT::Experimental::TDF; TEST(TDataFrameInterface, CreateFromNullTDirectory) { @@ -89,3 +91,38 @@ TEST(TDataFrameInterface, CheckAliasesPerChain) } EXPECT_EQ(0, ret) << "No exception thrown when trying to alias a non-existing column."; } + +TEST(TDataFrameInterface, GetColumnNamesFromScratch) +{ + TDataFrame f(1); + auto dummyGen = []() { return 1; }; + auto names = f.Define("a", dummyGen).Define("b", dummyGen).Define("__TDF_Dummy", dummyGen).GetColumnNames(); + EXPECT_STREQ("a", names[0].c_str()); + EXPECT_STREQ("b", names[1].c_str()); + EXPECT_EQ(2U, names.size()); +} + +TEST(TDataFrameInterface, GetColumnNamesFromTree) +{ + TTree t("t","t"); + int a,b; + t.Branch("a",&a); + t.Branch("b",&b); + TDataFrame tdf(t); + auto names = tdf.GetColumnNames(); + EXPECT_STREQ("a", names[0].c_str()); + EXPECT_STREQ("b", names[1].c_str()); + EXPECT_EQ(2U, names.size()); +} + +TEST(TDataFrameInterface, GetColumnNamesFromSource) +{ + std::unique_ptr tds(new TTrivialDS(1)); + TDataFrame tdf(std::move(tds)); + auto names = tdf.Define("b", []() { return 1; }).GetColumnNames(); + EXPECT_STREQ("b", names[0].c_str()); + EXPECT_STREQ("col0", names[1].c_str()); + EXPECT_EQ(2U, names.size()); +} + + From c248a5fa1fa3cef27c899b4d685dfe9905e1f0d5 Mon Sep 17 00:00:00 2001 From: Danilo Piparo Date: Fri, 29 Sep 2017 06:48:39 +0200 Subject: [PATCH 3/5] Update Release notes --- README/ReleaseNotes/v612/index.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README/ReleaseNotes/v612/index.md b/README/ReleaseNotes/v612/index.md index 71b8d6f106211..10c17802ed954 100644 --- a/README/ReleaseNotes/v612/index.md +++ b/README/ReleaseNotes/v612/index.md @@ -98,7 +98,8 @@ large TClonesArray where each element contains another small vector container. - Users can now configure Snapshot to use different file open modes ("RECREATE" or "UPDATE"), compression level, compression algorithm, TTree split-level and autoflush settings - Python tutorials show the new "tuple-initialisation" feature of PyROOT (see below) - The possibility to read from data sources was added. An interface for all data sources, TDataSource, is provided by ROOT. Two example data sources have been provided too: the TRootDS and the TTrivialDS. The former allows to read via the novel data source mechanism ROOT data, while the latter is a simple generator, created for testing and didactic purposes. It is therefore now possible to interface *any* kind of dataset/data format to ROOT as long as an adaptor which implements the pure virtual methods of the TDataSource interface can be written in C++. - - Column can be aliased with the TInterface method Alias: `auto histo = mytdf.Alias("myAlias", "myColumn").Histo1D("myAlias");` + - Column can be aliased with the `TDF::TInterface` method `Alias`: `auto histo = mytdf.Alias("myAlias", "myColumn").Histo1D("myAlias");` + - Add the `GetColumnsNames` method to the `TDF::TInterface`: the user can therefore get the names of the available columns coming from trees, data sources or `Define`d columns ## Histogram Libraries From ca17b773c02078e7960519c1c66b73ec6476de43 Mon Sep 17 00:00:00 2001 From: Danilo Piparo Date: Fri, 29 Sep 2017 06:50:00 +0200 Subject: [PATCH 4/5] Formatting --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 8 ++++++-- tree/treeplayer/test/dataframe/dataframe_interface.cxx | 10 ++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 320828c85c00f..193f463cbc78c 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -1334,7 +1334,10 @@ public: { ColumnNames_t allColumns; - auto addIfNotInternal = [&allColumns](std::string_view colName){ if (!TDFInternal::IsInternalColumn(colName)) allColumns.emplace_back(colName);}; + auto addIfNotInternal = [&allColumns](std::string_view colName) { + if (!TDFInternal::IsInternalColumn(colName)) + allColumns.emplace_back(colName); + }; std::for_each(fValidCustomColumns.begin(), fValidCustomColumns.end(), addIfNotInternal); @@ -1378,7 +1381,8 @@ private: TRegexp regexp(theRegex); int dummy; for (auto &&branchName : customColumns) { - if ((isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) && !TDFInternal::IsInternalColumn(branchName)) { + if ((isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) && + !TDFInternal::IsInternalColumn(branchName)) { selectedColumns.emplace_back(branchName); } } diff --git a/tree/treeplayer/test/dataframe/dataframe_interface.cxx b/tree/treeplayer/test/dataframe/dataframe_interface.cxx index 9437874eb9b91..fb2bf5d58a0e8 100644 --- a/tree/treeplayer/test/dataframe/dataframe_interface.cxx +++ b/tree/treeplayer/test/dataframe/dataframe_interface.cxx @@ -104,10 +104,10 @@ TEST(TDataFrameInterface, GetColumnNamesFromScratch) TEST(TDataFrameInterface, GetColumnNamesFromTree) { - TTree t("t","t"); - int a,b; - t.Branch("a",&a); - t.Branch("b",&b); + TTree t("t", "t"); + int a, b; + t.Branch("a", &a); + t.Branch("b", &b); TDataFrame tdf(t); auto names = tdf.GetColumnNames(); EXPECT_STREQ("a", names[0].c_str()); @@ -124,5 +124,3 @@ TEST(TDataFrameInterface, GetColumnNamesFromSource) EXPECT_STREQ("col0", names[1].c_str()); EXPECT_EQ(2U, names.size()); } - - From 1c56dbc211f7d3b560255ab61c9839f10a7b628e Mon Sep 17 00:00:00 2001 From: Danilo Piparo Date: Fri, 29 Sep 2017 06:52:18 +0200 Subject: [PATCH 5/5] [TDF] Avoid that clang-format formats doxy for a method --- tree/treeplayer/inc/ROOT/TDFInterface.hxx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 193f463cbc78c..914cbebf7f8f4 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -556,6 +556,7 @@ public: return *reinterpret_cast *>(newTDFPtr); } + // clang-format off //////////////////////////////////////////////////////////////////////////// /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. /// \param[in] treename The name of the output TTree @@ -572,6 +573,7 @@ public: auto selectedColumns = ConvertRegexToColumns(columnNameRegexp); return Snapshot(treename, filename, selectedColumns, options); } + // clang-format on //////////////////////////////////////////////////////////////////////////// /// \brief Save selected columns in memory