Skip to content
9 changes: 4 additions & 5 deletions tree/treeplayer/inc/ROOT/TDFInterface.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1320,11 +1320,10 @@ private:
void DefineDSColumnHelper(std::string_view name, TLoopManager &lm)
{
assert(fDataSource != nullptr);
const auto nSlots = fProxiedPtr->GetNSlots();
auto readers = fDataSource->GetColumnReaders<T>(name, nSlots);
auto getValuePtr = [readers](unsigned int slot) { return *readers[slot]; };
using NewCol_t = TDFDetail::TCustomColumn<decltype(getValuePtr), true, true>;
lm.Book(std::make_shared<NewCol_t>(name, std::move(getValuePtr), ColumnNames_t{}, &lm));
auto readers = fDataSource->GetColumnReaders<T>(name);
auto getValue = [readers](unsigned int slot) { return **readers[slot]; };
using NewCol_t = TDFDetail::TCustomColumn<decltype(getValue), true>;
lm.Book(std::make_shared<NewCol_t>(name, std::move(getValue), ColumnNames_t{}, &lm));
lm.AddDataSourceColumn(name);
}

Expand Down
50 changes: 31 additions & 19 deletions tree/treeplayer/inc/ROOT/TDFNodes.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -369,13 +369,41 @@ public:
unsigned int GetNSlots() const { return fNSlots; }
};

template <typename F, bool PassSlotNumber = false, bool IsDataSourceColumn = false>
template <typename F, bool PassSlotNumber = false>
class TCustomColumn final : public TCustomColumnBase {

/// A type that throws if default-constructed or assigned to (which is all TCustomColumn will do with it)
struct AbortIfUsed {
template<typename T>
void operator=(const T&) {
assert(false && "This `Define`d column returns a non-assignable type. This is not supported.");
}
};

using FunParamTypes_t = typename CallableTraits<F>::arg_types;
using BranchTypes_t = typename TDFInternal::RemoveFirstParameterIf<PassSlotNumber, FunParamTypes_t>::type;
using TypeInd_t = TDFInternal::GenStaticSeq_t<BranchTypes_t::list_size>;
using ret_type = typename CallableTraits<F>::ret_type;
// We need UpdateHelper to compile even for non-assignable or non-default-constructible return types of the custom
// column expression.
// In particular the expression `fLastResults[0] = fExpression(columns...)` would not compile if the returned type
// did not have an assignment operator, and `new ret_type()` would not compile without a default constructor. So we
// switch these problematic return types with `AbortIfUsed`, which breaks on an assertion if someone ever tries to
// actually default-construct it, or assign to it.
// This workaround is required because the code-path that reads values from a data-source always triggers compilation
// of `TCustomColumn`s of each type that a node takes in input, even though a data-source is not actually present at
// runtime, and even though that type is non-assignable/non-default-constructible.
//
// The workaround can go away if/when we support data-source columns of non-assignable types (or at least when we
// avoid doing those assignments ourselves) and if we decide to drop support of non-default-constructible types.
//
// An alternative solution might be delegating calls to `DefineDataSourceColumns` to specialized functions chosen
// at TInterface construction time (only when TDataSource is present the specialized function would actually do
// something). This way the compiler would try to compile TCustomColumns for all inputs of all nodes only when
// a TDataSource is present.
using TrueRetType_t = typename CallableTraits<F>::ret_type;
using ret_type = typename std::conditional<std::is_copy_assignable<TrueRetType_t>::value &&
std::is_default_constructible<TrueRetType_t>::value,
TrueRetType_t, AbortIfUsed>::type;
// Avoid instantiating vector<bool> as `operator[]` returns temporaries in that case. Use std::deque instead.
using ValuesPerSlot_t =
typename std::conditional<std::is_same<ret_type, bool>::value, std::deque<ret_type>, std::vector<ret_type>>::type;
Expand Down Expand Up @@ -403,11 +431,7 @@ public:
fImplPtr->GetBookedColumns(), TypeInd_t());
}

void *GetValuePtr(unsigned int slot) final
{
// could be nicely made a constexpr if instead
return GetValuePtrImpl(slot, std::integral_constant<bool, IsDataSourceColumn>());
}
void *GetValuePtr(unsigned int slot) final { return static_cast<void *>(&fLastResults[slot]); }

void Update(unsigned int slot, Long64_t entry) final
{
Expand Down Expand Up @@ -441,18 +465,6 @@ public:
}

void ClearValueReaders(unsigned int slot) final { ResetTDFValueTuple(fValues[slot], TypeInd_t()); }
private:
// user-defined custom columns are stored by value, take their address
void *GetValuePtrImpl(unsigned int slot, std::false_type /*IsDataSourceColumn*/)
{
return static_cast<void *>(&fLastResults[slot]);
}

// data-source columns are stored by pointer, just cast it to void*
void *GetValuePtrImpl(unsigned int slot, std::true_type /*IsDataSourceColumn*/)
{
return static_cast<void *>(fLastResults[slot]);
}
};

class TFilterBase {
Expand Down
9 changes: 6 additions & 3 deletions tree/treeplayer/inc/ROOT/TDataSource.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ public:
virtual std::string GetTypeName(std::string_view) const = 0;
/// Called at most once per column by TDF. Return vector of pointers to pointers to column values - one per slot.
template <typename T>
std::vector<T **> GetColumnReaders(std::string_view name, unsigned int nSlots)
std::vector<T **> GetColumnReaders(std::string_view name)
{
auto typeErasedVec = GetColumnReadersImpl(name, nSlots, typeid(T));
auto typeErasedVec = GetColumnReadersImpl(name, typeid(T));
std::vector<T **> typedVec(typeErasedVec.size());
std::transform(typeErasedVec.begin(), typeErasedVec.end(), typedVec.begin(),
[](void *p) { return static_cast<T **>(p); });
Expand All @@ -43,6 +43,9 @@ public:
virtual const std::vector<std::pair<ULong64_t, ULong64_t>> &GetEntryRanges() const = 0;
/// Different threads will loop over different ranges and will pass different "slot" values.
virtual void SetEntry(ULong64_t entry, unsigned int slot) = 0;
/// Method to set the number of slots. Some implementations may rely on this
/// information for optimisation purposes.
virtual void SetNSlots(ULong64_t nSlots) = 0;
/// Convenience method called at the start of each task, before processing a range of entries.
/// DataSources can implement it if needed (does nothing by default).
/// firstEntry is the first entry of the range that the task will process.
Expand All @@ -51,7 +54,7 @@ public:
protected:
/// type-erased vector of pointers to pointers to column values - one per slot
virtual std::vector<void *>
GetColumnReadersImpl(std::string_view name, unsigned int nSlots, const std::type_info &) = 0;
GetColumnReadersImpl(std::string_view name, const std::type_info &) = 0;
};

} // ns TDF
Expand Down
2 changes: 2 additions & 0 deletions tree/treeplayer/src/TDataFrame.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -642,4 +642,6 @@ TDataFrame::TDataFrame(ULong64_t numEntries)
TDataFrame::TDataFrame(std::unique_ptr<TDataSource> ds, const ColumnNames_t &defaultBranches)
: TInterface<TDFDetail::TLoopManager>(std::make_shared<TDFDetail::TLoopManager>(std::move(ds), defaultBranches))
{
const auto nSlots = ROOT::GetImplicitMTPoolSize();
GetProxiedPtr()->GetDataSource()->SetNSlots( 0U == nSlots ? 1U : nSlots);
}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would change these two lines with fDataSource->SetNSlots(fNSlots) in TDFNodes.cxx:129 (TLoopManager's ctor) to keep TLoopManager the sole authority when it comes to TDF's number of slots.