diff --git a/README/ReleaseNotes/v612/index.md b/README/ReleaseNotes/v612/index.md index 71b8d6f106211..10c17802ed954 100644 --- a/README/ReleaseNotes/v612/index.md +++ b/README/ReleaseNotes/v612/index.md @@ -98,7 +98,8 @@ large TClonesArray where each element contains another small vector container. - Users can now configure Snapshot to use different file open modes ("RECREATE" or "UPDATE"), compression level, compression algorithm, TTree split-level and autoflush settings - Python tutorials show the new "tuple-initialisation" feature of PyROOT (see below) - The possibility to read from data sources was added. An interface for all data sources, TDataSource, is provided by ROOT. Two example data sources have been provided too: the TRootDS and the TTrivialDS. The former allows to read via the novel data source mechanism ROOT data, while the latter is a simple generator, created for testing and didactic purposes. It is therefore now possible to interface *any* kind of dataset/data format to ROOT as long as an adaptor which implements the pure virtual methods of the TDataSource interface can be written in C++. - - Column can be aliased with the TInterface method Alias: `auto histo = mytdf.Alias("myAlias", "myColumn").Histo1D("myAlias");` + - Column can be aliased with the `TDF::TInterface` method `Alias`: `auto histo = mytdf.Alias("myAlias", "myColumn").Histo1D("myAlias");` + - Add the `GetColumnsNames` method to the `TDF::TInterface`: the user can therefore get the names of the available columns coming from trees, data sources or `Define`d columns ## Histogram Libraries diff --git a/tree/treeplayer/inc/ROOT/TDFInterface.hxx b/tree/treeplayer/inc/ROOT/TDFInterface.hxx index 719e99fa4af10..914cbebf7f8f4 100644 --- a/tree/treeplayer/inc/ROOT/TDFInterface.hxx +++ b/tree/treeplayer/inc/ROOT/TDFInterface.hxx @@ -556,6 +556,7 @@ public: return *reinterpret_cast *>(newTDFPtr); } + // clang-format off //////////////////////////////////////////////////////////////////////////// /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. /// \param[in] treename The name of the output TTree @@ -572,6 +573,7 @@ public: auto selectedColumns = ConvertRegexToColumns(columnNameRegexp); return Snapshot(treename, filename, selectedColumns, options); } + // clang-format on //////////////////////////////////////////////////////////////////////////// /// \brief Save selected columns in memory @@ -1325,6 +1327,39 @@ public: fProxiedPtr->Report(); } + ///////////////////////////////////////////////////////////////////////////// + /// \brief Returns the names of the available columns + /// + /// This is not an action nor a transformation, just a simple utility to + /// get columns names out of the TDataFrame nodes. + ColumnNames_t GetColumnNames() + { + ColumnNames_t allColumns; + + auto addIfNotInternal = [&allColumns](std::string_view colName) { + if (!TDFInternal::IsInternalColumn(colName)) + allColumns.emplace_back(colName); + }; + + std::for_each(fValidCustomColumns.begin(), fValidCustomColumns.end(), addIfNotInternal); + + auto df = GetDataFrameChecked(); + auto tree = df->GetTree(); + if (tree) { + const auto branches = tree->GetListOfBranches(); + for (auto branch : *branches) { + allColumns.emplace_back(branch->GetName()); + } + } + + if (fDataSource) { + auto &dsColNames = fDataSource->GetColumnNames(); + allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end()); + } + + return allColumns; + } + private: ColumnNames_t ConvertRegexToColumns(std::string_view columnNameRegexp) { @@ -1348,7 +1383,8 @@ private: TRegexp regexp(theRegex); int dummy; for (auto &&branchName : customColumns) { - if ((isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) && !TDFInternal::IsInternalColumn(branchName)) { + if ((isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) && + !TDFInternal::IsInternalColumn(branchName)) { selectedColumns.emplace_back(branchName); } } diff --git a/tree/treeplayer/test/dataframe/dataframe_interface.cxx b/tree/treeplayer/test/dataframe/dataframe_interface.cxx index 7e804dbab310e..fb2bf5d58a0e8 100644 --- a/tree/treeplayer/test/dataframe/dataframe_interface.cxx +++ b/tree/treeplayer/test/dataframe/dataframe_interface.cxx @@ -1,10 +1,12 @@ #include "ROOT/TDataFrame.hxx" +#include "ROOT/TTrivialDS.hxx" #include "TMemFile.h" #include "TTree.h" #include "gtest/gtest.h" using namespace ROOT::Experimental; +using namespace ROOT::Experimental::TDF; TEST(TDataFrameInterface, CreateFromNullTDirectory) { @@ -89,3 +91,36 @@ TEST(TDataFrameInterface, CheckAliasesPerChain) } EXPECT_EQ(0, ret) << "No exception thrown when trying to alias a non-existing column."; } + +TEST(TDataFrameInterface, GetColumnNamesFromScratch) +{ + TDataFrame f(1); + auto dummyGen = []() { return 1; }; + auto names = f.Define("a", dummyGen).Define("b", dummyGen).Define("__TDF_Dummy", dummyGen).GetColumnNames(); + EXPECT_STREQ("a", names[0].c_str()); + EXPECT_STREQ("b", names[1].c_str()); + EXPECT_EQ(2U, names.size()); +} + +TEST(TDataFrameInterface, GetColumnNamesFromTree) +{ + TTree t("t", "t"); + int a, b; + t.Branch("a", &a); + t.Branch("b", &b); + TDataFrame tdf(t); + auto names = tdf.GetColumnNames(); + EXPECT_STREQ("a", names[0].c_str()); + EXPECT_STREQ("b", names[1].c_str()); + EXPECT_EQ(2U, names.size()); +} + +TEST(TDataFrameInterface, GetColumnNamesFromSource) +{ + std::unique_ptr tds(new TTrivialDS(1)); + TDataFrame tdf(std::move(tds)); + auto names = tdf.Define("b", []() { return 1; }).GetColumnNames(); + EXPECT_STREQ("b", names[0].c_str()); + EXPECT_STREQ("col0", names[1].c_str()); + EXPECT_EQ(2U, names.size()); +}