Skip to content

Commit 3c0166d

Browse files
committed
[TDF] Define DataSource columns on demand in CreateActions
Most actions (the ones that are handled by CreateActions) can now detect when a column they act upon comes from a datasource and has not yet been defined. The appropriate `Define`ition is triggered accordingly.
1 parent cde9e8c commit 3c0166d

File tree

4 files changed

+78
-14
lines changed

4 files changed

+78
-14
lines changed

tree/treeplayer/inc/ROOT/TDFInterface.hxx

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,13 +1149,15 @@ private:
11491149
typename std::enable_if<!TDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
11501150
TResultProxy<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
11511151
{
1152-
auto loopManager = GetDataFrameChecked();
1153-
const auto nColumns = sizeof...(BranchTypes);
1154-
const auto validColumnNames = GetValidatedColumnNames(*loopManager, nColumns, columns);
1152+
auto lm = GetDataFrameChecked();
1153+
constexpr auto nColumns = sizeof...(BranchTypes);
1154+
const auto colsAndUndefCols = CheckColumnNames(columns, nColumns, *lm);
1155+
if (fDataSource)
1156+
DefineDataSourceColumns<BranchTypes...>(colsAndUndefCols, *lm, TDFInternal::GenStaticSeq_t<nColumns>());
1157+
const auto &theColumnNames = colsAndUndefCols.first;
11551158
const auto nSlots = fProxiedPtr->GetNSlots();
1156-
TDFInternal::BuildAndBook<BranchTypes...>(validColumnNames, r, nSlots, *loopManager, *fProxiedPtr,
1157-
(ActionType *)nullptr);
1158-
return MakeResultProxy(r, loopManager);
1159+
TDFInternal::BuildAndBook<BranchTypes...>(theColumnNames, r, nSlots, *lm, *fProxiedPtr, (ActionType *)nullptr);
1160+
return MakeResultProxy(r, lm);
11591161
}
11601162

11611163
// User did not specify type, do type inference
@@ -1238,12 +1240,18 @@ private:
12381240
return snapshotTDF;
12391241
}
12401242

1241-
ColumnNames_t GetValidatedColumnNames(TLoopManager &lm, const unsigned int nColumns,
1242-
const ColumnNames_t &userColumns)
1243+
/// Given the desired number of columns and the user-provided list of columns:
1244+
/// * fallback to using the first nColumns default columns if needed (or throw if nColumns > nDefaultColumns)
1245+
/// * check that selected column names refer to valid branches, custom columns or datasource columns (throw if not)
1246+
/// Return the list of selected column names and a bitmask indicating the columns that must be defined via datasource
1247+
std::pair<ColumnNames_t, std::vector<bool>>
1248+
CheckColumnNames(const ColumnNames_t &userColumns, const unsigned int nColumns, TLoopManager &lm)
12431249
{
12441250
const auto &defaultColumns = lm.GetDefaultColumnNames();
1245-
const auto trueColumns = TDFInternal::SelectColumns(nColumns, userColumns, defaultColumns);
1246-
const auto unknownColumns = TDFInternal::FindUnknownColumns(trueColumns, lm.GetTree(), fValidCustomColumns);
1251+
const auto selectedColumns = TDFInternal::SelectColumns(nColumns, userColumns, defaultColumns);
1252+
const auto unknownColumns =
1253+
TDFInternal::FindUnknownColumns(selectedColumns, lm.GetTree(), fValidCustomColumns,
1254+
fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
12471255

12481256
if (!unknownColumns.empty()) {
12491257
// throw
@@ -1256,7 +1264,54 @@ private:
12561264
throw std::runtime_error("Unknown column" + unknowns.str());
12571265
}
12581266

1259-
return trueColumns;
1267+
const auto mustBeDefined = FindUndefinedDSColumns(selectedColumns, lm.GetDefinedDataSourceColumns());
1268+
return std::make_pair(selectedColumns, mustBeDefined);
1269+
}
1270+
1271+
//TODO remove usages of this everywhere
1272+
ColumnNames_t
1273+
GetValidatedColumnNames(TLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &userColumns) {
1274+
return CheckColumnNames(userColumns, nColumns, lm).first;
1275+
}
1276+
1277+
/// Return a bitset each element of which indicates whether the corresponding element in `selectedColumns` is the
1278+
/// name of a column that must be defined via datasource. All elements of the returned vector are false if no
1279+
/// data-source is present.
1280+
std::vector<bool> FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedDSCols)
1281+
{
1282+
const auto nColumns = requestedCols.size();
1283+
std::vector<bool> mustBeDefined(nColumns, false);
1284+
if (fDataSource) {
1285+
for (auto i = 0u; i < nColumns; ++i)
1286+
mustBeDefined[i] =
1287+
std::find(definedDSCols.begin(), definedDSCols.end(), requestedCols[i]) == definedDSCols.end();
1288+
}
1289+
return mustBeDefined;
1290+
}
1291+
1292+
template <typename... ColumnTypes, int... S>
1293+
void DefineDataSourceColumns(const std::pair<std::vector<std::string>, std::vector<bool>> &colsAndUndefCols,
1294+
TLoopManager &lm, TDFInternal::StaticSeq<S...> /*dummy*/)
1295+
{
1296+
assert(fDataSource != nullptr);
1297+
const auto &columns = colsAndUndefCols.first;
1298+
const auto &mustBeDefined = colsAndUndefCols.second;
1299+
if (std::none_of(mustBeDefined.begin(), mustBeDefined.end(), [](bool b) { return b; })) {
1300+
// no need to define any column
1301+
return;
1302+
} else {
1303+
// hack to expand a template parameter pack without c++17 fold expressions.
1304+
std::initializer_list<int> expander{
1305+
(mustBeDefined[S] ? DefineDSColumnHelper<ColumnTypes>(columns[S], lm) : /*no-op*/((void)0), 0)...};
1306+
}
1307+
}
1308+
1309+
template<typename T>
1310+
void DefineDSColumnHelper(std::string_view name, TLoopManager &lm) {
1311+
const auto nSlots = fProxiedPtr->GetNSlots();
1312+
auto readers = fDataSource->GetColumnReaders<T>(name, nSlots);
1313+
DefineSlot(name, [readers](unsigned int slot) { return **readers[slot]; });
1314+
lm.AddDataSourceColumn(name);
12601315
}
12611316

12621317
protected:

tree/treeplayer/inc/ROOT/TDFNodes.hxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ class TLoopManager : public std::enable_shared_from_this<TLoopManager> {
102102
unsigned int fNStopsReceived{0}; ///< Number of times that a children node signaled to stop processing entries.
103103
const ELoopType fLoopType; ///< The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
104104
std::string fToJit; ///< string containing all `BuildAndBook` actions that should be jitted before running
105-
std::unique_ptr<TDataSource> fDataSource; ///< Owning pointer to a data-source object. Null if no data-source.
105+
std::unique_ptr<TDataSource> fDataSource; ///< Owning pointer to a data-source object. Null if no data-source
106+
ColumnNames_t fDefinedDataSourceColumns; ///< List of data-source columns that have been `Define`d so far
106107

107108
void RunEmptySourceMT();
108109
void RunEmptySource();
@@ -149,6 +150,8 @@ public:
149150
void IncrChildrenCount() { ++fNChildren; }
150151
void StopProcessing() { ++fNStopsReceived; }
151152
void Jit(const std::string &s) { fToJit.append(s); }
153+
const ColumnNames_t &GetDefinedDataSourceColumns() const { return fDefinedDataSourceColumns; }
154+
void AddDataSourceColumn(std::string_view name) { fDefinedDataSourceColumns.emplace_back(name); }
152155
};
153156
} // end ns TDF
154157
} // end ns Detail

tree/treeplayer/inc/ROOT/TDFUtils.hxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,8 @@ void CheckSnapshot(unsigned int nTemplateParams, unsigned int nColumnNames);
210210
const ColumnNames_t SelectColumns(unsigned int nArgs, const ColumnNames_t &bl, const ColumnNames_t &defBl);
211211

212212
/// Check whether column names refer to a valid branch of a TTree or have been `Define`d. Return invalid column names.
213-
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, TTree *tree, const ColumnNames_t &definedCols);
213+
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, TTree *tree, const ColumnNames_t &definedCols,
214+
const ColumnNames_t &dataSourceColumns);
214215

215216
namespace ActionTypes {
216217
struct Histo1D {

tree/treeplayer/src/TDFUtils.cxx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,8 @@ const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames
185185
}
186186
}
187187

188-
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, TTree *tree, const ColumnNames_t &definedCols)
188+
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, TTree *tree, const ColumnNames_t &definedCols,
189+
const ColumnNames_t &dataSourceColumns)
189190
{
190191
ColumnNames_t unknownColumns;
191192
for (auto &column : requiredCols) {
@@ -195,6 +196,10 @@ ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, TTree *tree,
195196
const auto isCustomColumn = std::find(definedCols.begin(), definedCols.end(), column) != definedCols.end();
196197
if (isCustomColumn)
197198
continue;
199+
const auto isDataSourceColumn =
200+
std::find(dataSourceColumns.begin(), dataSourceColumns.end(), column) != dataSourceColumns.end();
201+
if (isDataSourceColumn)
202+
continue;
198203
unknownColumns.emplace_back(column);
199204
}
200205
return unknownColumns;

0 commit comments

Comments
 (0)