Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README/ReleaseNotes/v612/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ large TClonesArray where each element contains another small vector container.
- Users can now configure Snapshot to use different file open modes ("RECREATE" or "UPDATE"), compression level, compression algorithm, TTree split-level and autoflush settings
- Python tutorials show the new "tuple-initialisation" feature of PyROOT (see below)
- The possibility to read from data sources was added. An interface for all data sources, TDataSource, is provided by ROOT. Two example data sources have been provided too: the TRootDS and the TTrivialDS. The former allows to read via the novel data source mechanism ROOT data, while the latter is a simple generator, created for testing and didactic purposes. It is therefore now possible to interface *any* kind of dataset/data format to ROOT as long as an adaptor which implements the pure virtual methods of the TDataSource interface can be written in C++.
- Column can be aliased with the TInterface method Alias: `auto histo = mytdf.Alias("myAlias", "myColumn").Histo1D("myAlias");`
- Column can be aliased with the `TDF::TInterface` method `Alias`: `auto histo = mytdf.Alias("myAlias", "myColumn").Histo1D("myAlias");`
- Add the `GetColumnsNames` method to the `TDF::TInterface`: the user can therefore get the names of the available columns coming from trees, data sources or `Define`d columns

## Histogram Libraries

Expand Down
38 changes: 37 additions & 1 deletion tree/treeplayer/inc/ROOT/TDFInterface.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,7 @@ public:
return *reinterpret_cast<TInterface<TLoopManager> *>(newTDFPtr);
}

// clang-format off
////////////////////////////////////////////////////////////////////////////
/// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
/// \param[in] treename The name of the output TTree
Expand All @@ -572,6 +573,7 @@ public:
auto selectedColumns = ConvertRegexToColumns(columnNameRegexp);
return Snapshot(treename, filename, selectedColumns, options);
}
// clang-format on

////////////////////////////////////////////////////////////////////////////
/// \brief Save selected columns in memory
Expand Down Expand Up @@ -1325,6 +1327,39 @@ public:
fProxiedPtr->Report();
}

/////////////////////////////////////////////////////////////////////////////
/// \brief Returns the names of the available columns
///
/// This is not an action nor a transformation, just a simple utility to
/// get columns names out of the TDataFrame nodes.
ColumnNames_t GetColumnNames()
{
ColumnNames_t allColumns;

auto addIfNotInternal = [&allColumns](std::string_view colName) {
if (!TDFInternal::IsInternalColumn(colName))
allColumns.emplace_back(colName);
};

std::for_each(fValidCustomColumns.begin(), fValidCustomColumns.end(), addIfNotInternal);

auto df = GetDataFrameChecked();
auto tree = df->GetTree();
if (tree) {
const auto branches = tree->GetListOfBranches();
for (auto branch : *branches) {
allColumns.emplace_back(branch->GetName());
}
}

if (fDataSource) {
auto &dsColNames = fDataSource->GetColumnNames();
allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
}

return allColumns;
}

private:
ColumnNames_t ConvertRegexToColumns(std::string_view columnNameRegexp)
{
Expand All @@ -1348,7 +1383,8 @@ private:
TRegexp regexp(theRegex);
int dummy;
for (auto &&branchName : customColumns) {
if ((isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) && !TDFInternal::IsInternalColumn(branchName)) {
if ((isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) &&
!TDFInternal::IsInternalColumn(branchName)) {
selectedColumns.emplace_back(branchName);
}
}
Expand Down
35 changes: 35 additions & 0 deletions tree/treeplayer/test/dataframe/dataframe_interface.cxx
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#include "ROOT/TDataFrame.hxx"
#include "ROOT/TTrivialDS.hxx"
#include "TMemFile.h"
#include "TTree.h"

#include "gtest/gtest.h"

using namespace ROOT::Experimental;
using namespace ROOT::Experimental::TDF;

TEST(TDataFrameInterface, CreateFromNullTDirectory)
{
Expand Down Expand Up @@ -89,3 +91,36 @@ TEST(TDataFrameInterface, CheckAliasesPerChain)
}
EXPECT_EQ(0, ret) << "No exception thrown when trying to alias a non-existing column.";
}

TEST(TDataFrameInterface, GetColumnNamesFromScratch)
{
TDataFrame f(1);
auto dummyGen = []() { return 1; };
auto names = f.Define("a", dummyGen).Define("b", dummyGen).Define("__TDF_Dummy", dummyGen).GetColumnNames();
EXPECT_STREQ("a", names[0].c_str());
EXPECT_STREQ("b", names[1].c_str());
EXPECT_EQ(2U, names.size());
}

TEST(TDataFrameInterface, GetColumnNamesFromTree)
{
TTree t("t", "t");
int a, b;
t.Branch("a", &a);
t.Branch("b", &b);
TDataFrame tdf(t);
auto names = tdf.GetColumnNames();
EXPECT_STREQ("a", names[0].c_str());
EXPECT_STREQ("b", names[1].c_str());
EXPECT_EQ(2U, names.size());
}

TEST(TDataFrameInterface, GetColumnNamesFromSource)
{
std::unique_ptr<TDataSource> tds(new TTrivialDS(1));
TDataFrame tdf(std::move(tds));
auto names = tdf.Define("b", []() { return 1; }).GetColumnNames();
EXPECT_STREQ("b", names[0].c_str());
EXPECT_STREQ("col0", names[1].c_str());
EXPECT_EQ(2U, names.size());
}