diff --git a/README/ReleaseNotes/v626/index.md b/README/ReleaseNotes/v626/index.md index e587ddf6cf9e2..971bf8e78cf00 100644 --- a/README/ReleaseNotes/v626/index.md +++ b/README/ReleaseNotes/v626/index.md @@ -96,6 +96,48 @@ RooFit now contains two RDataFrame action helpers, `RooDataSetHelper` and `RooDa ``` For more details, consult the tutorial [rf408_RDataFrameToRooFit](https://root.cern/doc/v626/rf408__RDataFrameToRooFit_8C.html). +### Storing global observables in RooFit datasets + +RooFit groups model variables into *observables* and *parameters*, depending on if their values are stored in the dataset. +For fits with parameter constraints, there is a third kind of variables, called *global observables*. +These represent the results of auxiliary measurements that constrain the nuisance parameters. +In the RooFit implementation, a likelihood is generally the sum of two terms: + * the likelihood of the data given the parameters, where the normalization set is the set of observables (implemented by `RooNLLVar`) + * the constraint term, where the normalization set is the set of *global observables* (implemented by `RooConstraintSum`) + +Before this release, the global observable values were always taken from the model/pdf. +With this release, a mechanism is added to store a snapshot of global observables in any `RooDataSet` or `RooDataHist`. +For toy studies where the global observables assume a different values for each toy, the bookkeeping of the set of global observables and in particular their values is much easier with this change. + +Usage example for a model with global observables `g1` and `g2`: +```C++ +auto data = model.generate(x, 1000); // data has only the single observables x +data->setGlobalObservables(g1, g2); // now, data also stores a snapshot of g1 and g2 + +// If you fit the model to the data, the global observables and their values +// are taken from the dataset: +model.fitTo(*data); + +// You can still define the set of global observables yourself, but the values +// will be takes from the dataset if available: +model.fitTo(*data, GlobalObservables(g1, g2)); + +// To force `fitTo` to take the global observable values from the model even +// though they are in the dataset, you can use the new `GlobalObservablesSource` +// command argument: +model.fitTo(*data, GlobalObservables(g1, g2), GlobalObservablesSource("model")); +// The only other allowed value for `GlobalObservablesSource` is "data", which +// corresponds to the new default behavior explained above. +``` + +In case you create a RooFit dataset directly by calling its constructor, you can also pass the global observables in a command argument instead of calling `setGlobalObservables()` later: +```C++ +RooDataSet data{"dataset", "dataset", x, RooFit::GlobalObservables(g1, g2)}; +``` + +To access the set of global observables stored in a `RooAbsData`, call `RooAbsData::getGlobalObservables()`. +It returns a `nullptr` if no global observable snapshots are stored in the dataset. + ### Changes in `RooAbsPdf::fitTo` behaviour for multi-range fits The `RooAbsPdf::fitTo` and `RooAbsPdf::createNLL` functions accept a command argument to specify the fit range. @@ -106,7 +148,7 @@ From now on, the likelihoods are normalized by the sum of integrals in each rang ### Deprecation of the `RooMinuit` class -The `RooMinuit` class was the old interface between RooFit and minuit. With ROOT version 5.24, a the more general `RooMinimizer` adapter was introduced, which became the default with ROOT 6.08. +The `RooMinuit` class was the old interface between RooFit and minuit. With ROOT version 5.24, a the more general `RooMinimizer` adapter was introduced, which became the default with ROOT 6.08. Before 6.26, it was possible to still use the `RooMinuit` by passing the `Minimizer("OldMinuit", "minimizer")` command argument to `RooAbsPdf::fitTo()`. This option is now removed. diff --git a/roofit/roofitcore/inc/RooAbsData.h b/roofit/roofitcore/inc/RooAbsData.h index 6657583f9c1ad..e8e9c9725ab03 100644 --- a/roofit/roofitcore/inc/RooAbsData.h +++ b/roofit/roofitcore/inc/RooAbsData.h @@ -303,6 +303,13 @@ class RooAbsData : public TNamed, public RooPrintable { static StorageType getDefaultStorageType(); + /// Returns snapshot of global observables stored in this data. + /// \return Pointer to a RooArgSet with the snapshot of global observables + /// stored in the data. Can be `nullptr` if no global observales are + /// stored. + RooArgSet const* getGlobalObservables() const { return _globalObservables.get(); } + void setGlobalObservables(RooArgSet const& globalObservables); + protected: static StorageType defaultStorageType ; @@ -350,8 +357,12 @@ class RooAbsData : public TNamed, public RooPrintable { std::map _ownedComponents ; // Owned external components + std::unique_ptr _globalObservables; // Snapshot of global observables + private: - ClassDef(RooAbsData, 5) // Abstract data collection + void copyGlobalObservables(const RooAbsData& other); + + ClassDef(RooAbsData, 6) // Abstract data collection }; #endif diff --git a/roofit/roofitcore/inc/RooAbsPdf.h b/roofit/roofitcore/inc/RooAbsPdf.h index 4ad0e2a7ab84c..bdfb332bed70e 100644 --- a/roofit/roofitcore/inc/RooAbsPdf.h +++ b/roofit/roofitcore/inc/RooAbsPdf.h @@ -159,6 +159,32 @@ class RooAbsPdf : public RooAbsReal { const RooCmdArg& arg6=RooCmdArg::none(), const RooCmdArg& arg7=RooCmdArg::none(), const RooCmdArg& arg8=RooCmdArg::none()) ; virtual RooFitResult* fitTo(RooAbsData& data, const RooLinkedList& cmdList) ; + /// Configuration struct for RooAbsPdf::minimizeNLL with all the default + //values that also should be taked as the default values for + //RooAbsPdf::fitTo. + struct MinimizerConfig { + double recoverFromNaN = 10.; + std::string fitOpt = ""; + int optConst = 2; + int verbose = 0; + int doSave = 0; + int doTimer = 0; + int printLevel = 1; + int strat = 1; + int initHesse = 0; + int hesse = 1; + int minos = 0; + int numee = 10; + int doEEWall = 1; + int doWarn = 1; + int doSumW2 = -1; + int doAsymptotic = -1; + const RooArgSet* minosSet = nullptr; + std::string minType = "Minuit"; + std::string minAlg = "minuit"; + }; + std::unique_ptr minimizeNLL(RooAbsReal & nll, RooAbsData const& data, MinimizerConfig const& cfg); + virtual RooAbsReal* createNLL(RooAbsData& data, const RooLinkedList& cmdList) ; virtual RooAbsReal* createNLL(RooAbsData& data, const RooCmdArg& arg1=RooCmdArg::none(), const RooCmdArg& arg2=RooCmdArg::none(), const RooCmdArg& arg3=RooCmdArg::none(), const RooCmdArg& arg4=RooCmdArg::none(), const RooCmdArg& arg5=RooCmdArg::none(), @@ -359,8 +385,8 @@ class RooAbsPdf : public RooAbsReal { static TString _normRangeOverride ; private: - int calculateAsymptoticCorrectedCovMatrix(RooMinimizer& minimizer, RooAbsData const& data); - int calculateSumW2CorrectedCovMatrix(RooMinimizer& minimizer, RooAbsReal const& nll) const; + int calcAsymptoticCorrectedCovariance(RooMinimizer& minimizer, RooAbsData const& data); + int calcSumW2CorrectedCovariance(RooMinimizer& minimizer, RooAbsReal const& nll) const; ClassDef(RooAbsPdf,5) // Abstract PDF with normalization support }; diff --git a/roofit/roofitcore/inc/RooAbsTestStatistic.h b/roofit/roofitcore/inc/RooAbsTestStatistic.h index 5274220bbb121..bc9407ff2b211 100644 --- a/roofit/roofitcore/inc/RooAbsTestStatistic.h +++ b/roofit/roofitcore/inc/RooAbsTestStatistic.h @@ -51,6 +51,7 @@ class RooAbsTestStatistic : public RooAbsReal { bool cloneInputData = true; double integrateOverBinsPrecision = -1.; bool binnedL = false; + bool takeGlobalObservablesFromData = false; }; // Constructors, assignment etc @@ -155,10 +156,11 @@ class RooAbsTestStatistic : public RooAbsReal { RooFit::MPSplit _mpinterl = RooFit::BulkPartition; // Use interleaving strategy rather than N-wise split for partioning of dataset for multiprocessor-split Bool_t _doOffset = false; // Apply interval value offset to control numeric precision? + const bool _takeGlobalObservablesFromData = false; // If the global observable values are taken from data mutable ROOT::Math::KahanSum _offset = 0.0; //! Offset as KahanSum to avoid loss of precision mutable Double_t _evalCarry = 0.0; //! carry of Kahan sum in evaluatePartition - ClassDef(RooAbsTestStatistic,2) // Abstract base class for real-valued test statistics + ClassDef(RooAbsTestStatistic,3) // Abstract base class for real-valued test statistics }; diff --git a/roofit/roofitcore/inc/RooConstraintSum.h b/roofit/roofitcore/inc/RooConstraintSum.h index d721e42761cfc..f0942f96b2c0d 100644 --- a/roofit/roofitcore/inc/RooConstraintSum.h +++ b/roofit/roofitcore/inc/RooConstraintSum.h @@ -28,31 +28,39 @@ class RooConstraintSum : public RooAbsReal { public: RooConstraintSum() {} - RooConstraintSum(const char *name, const char *title, const RooArgSet& constraintSet, const RooArgSet& paramSet) ; + RooConstraintSum(const char *name, const char *title, const RooArgSet& constraintSet, const RooArgSet& paramSet, bool takeGlobalObservablesFromData=false) ; RooConstraintSum(const RooConstraintSum& other, const char* name = 0); - virtual TObject* clone(const char* newname) const { return new RooConstraintSum(*this, newname); } + virtual TObject* clone(const char* newname) const override { return new RooConstraintSum(*this, newname); } const RooArgList& list() { return _set1 ; } static std::unique_ptr createConstraintTerm( std::string const& name, RooAbsPdf const& pdf, - RooArgSet const& observables, + RooAbsData const& data, RooArgSet const* constrainedParameters, RooArgSet const* externalConstraints, RooArgSet const* globalObservables, const char* globalObservablesTag, + bool takeGlobalObservablesFromData, RooWorkspace * workspace = nullptr); + bool setData(RooAbsData const& data, bool cloneData=true); + /// \copydoc setData(RooAbsData const&, bool) + bool setData(RooAbsData& data, bool cloneData=true) override { + return setData(static_cast(data), cloneData); + } + protected: RooListProxy _set1 ; // Set of constraint terms RooSetProxy _paramSet ; // Set of parameters to which constraints apply + const bool _takeGlobalObservablesFromData = false; // If the global observable values are taken from data - Double_t evaluate() const; + Double_t evaluate() const override; - ClassDef(RooConstraintSum,2) // sum of -log of set of RooAbsPdf representing parameter constraints + ClassDefOverride(RooConstraintSum,3) // sum of -log of set of RooAbsPdf representing parameter constraints }; #endif diff --git a/roofit/roofitcore/inc/RooGlobalFunc.h b/roofit/roofitcore/inc/RooGlobalFunc.h index e22f5341bfb3d..0e8b2a0372bac 100644 --- a/roofit/roofitcore/inc/RooGlobalFunc.h +++ b/roofit/roofitcore/inc/RooGlobalFunc.h @@ -239,10 +239,14 @@ RooCmdArg SplitRange(Bool_t flag=kTRUE) ; RooCmdArg SumCoefRange(const char* rangeName) ; RooCmdArg Constrain(const RooArgSet& params) ; RooCmdArg Constrain(RooArgSet && params) ; -RooCmdArg GlobalObservables(const RooArgSet& globs) ; -RooCmdArg GlobalObservables(RooArgSet && globs) ; + +template +RooCmdArg GlobalObservables(Args_t &&... argsOrArgSet) { + return RooCmdArg("GlobalObservables",0,0,0,0,0,0,0,0,0,0, + &RooCmdArg::take(RooArgSet{std::forward(argsOrArgSet)...})); +} +RooCmdArg GlobalObservablesSource(const char* sourceName); RooCmdArg GlobalObservablesTag(const char* tagName) ; -//RooCmdArg Constrained() ; RooCmdArg ExternalConstraints(const RooArgSet& constraintPdfs) ; RooCmdArg ExternalConstraints(RooArgSet && constraintPdfs) ; RooCmdArg PrintEvalErrors(Int_t numErrors) ; diff --git a/roofit/roofitcore/src/RooAbsData.cxx b/roofit/roofitcore/src/RooAbsData.cxx index 0651026751ab6..be00bfb9bb866 100644 --- a/roofit/roofitcore/src/RooAbsData.cxx +++ b/roofit/roofitcore/src/RooAbsData.cxx @@ -23,6 +23,58 @@ RooAbsData is the common abstract base class for binned and unbinned datasets. The abstract interface defines plotting and tabulating entry points for its contents and provides an iterator over its elements (bins for binned data sets, data points for unbinned datasets). + +### Storing global observables in RooFit datasets + +RooFit groups model variables into *observables* and *parameters*, depending on +if their values are stored in the dataset. For fits with parameter +constraints, there is a third kind of variables, called *global observables*. +These represent the results of auxiliary measurements that constrain the +nuisance parameters. In the RooFit implementation, a likelihood is generally +the sum of two terms: +- the likelihood of the data given the parameters, where the normalization set + is the set of observables (implemented by RooNLLVar) +- the constraint term, where the normalization set is the set of *global +observables* (implemented by RooConstraintSum) + +Before this release, the global observable values were always taken from the +model/pdf. With this release, a mechanism is added to store a snapshot of +global observables in any RooDataSet or RooDataHist. For toy studies where the +global observables assume a different values for each toy, the bookkeeping of +the set of global observables and in particular their values is much easier +with this change. + +Usage example for a model with global observables `g1` and `g2`: +``` +auto data = model.generate(x, 1000); // data has only the single observables x +data->setGlobalObservables(g1, g2); // now, data also stores a snapshot of g1 and g2 + +// If you fit the model to the data, the global observables and their values +// are taken from the dataset: +model.fitTo(*data); + +// You can still define the set of global observables yourself, but the values +// will be takes from the dataset if available: +model.fitTo(*data, GlobalObservables(g1, g2)); + +// To force `fitTo` to take the global observable values from the model even +// though they are in the dataset, you can use the new `GlobalObservablesSource` +// command argument: +model.fitTo(*data, GlobalObservables(g1, g2), GlobalObservablesSource("model")); +// The only other allowed value for `GlobalObservablesSource` is "data", which +// corresponds to the new default behavior explained above. +``` + +In case you create a RooFit dataset directly by calling its constructor, you +can also pass the global observables in a command argument instead of calling +RooAbsData::setGlobalObservables() later: +``` +RooDataSet data{"dataset", "dataset", x, RooFit::GlobalObservables(g1, g2)}; +``` + +To access the set of global observables stored in a RooAbsData, call +RooAbsData::getGlobalObservables(). It returns a `nullptr` if no global +observable snapshots are stored in the dataset. **/ #include "RooAbsData.h" @@ -206,6 +258,8 @@ RooAbsData::RooAbsData(const RooAbsData& other, const char* newname) : storageType = other.storageType; } + copyGlobalObservables(other); + RooTrace::create(this) ; } @@ -245,9 +299,23 @@ RooAbsData& RooAbsData::operator=(const RooAbsData& other) { storageType = other.storageType; } + copyGlobalObservables(other); + return *this; } + +void RooAbsData::copyGlobalObservables(const RooAbsData& other) { + if (other._globalObservables) { + if(_globalObservables == nullptr) _globalObservables = std::make_unique(); + else _globalObservables->clear(); + other._globalObservables->snapshot(*_globalObservables); + } else { + _globalObservables.reset(nullptr); + } +} + + //////////////////////////////////////////////////////////////////////////////// /// Destructor @@ -397,8 +465,8 @@ RooAbsData* RooAbsData::reduce(const RooCmdArg& arg1,const RooCmdArg& arg2,const // Process & check varargs pc.process(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) ; - if (!pc.ok(kTRUE)) { - return 0 ; + if (!pc.ok(true)) { + return nullptr; } // Extract values from named arguments @@ -442,15 +510,12 @@ RooAbsData* RooAbsData::reduce(const RooCmdArg& arg1,const RooCmdArg& arg2,const } - if (!ret) return 0 ; + if (!ret) return nullptr; - if (name) { - ret->SetName(name) ; - } - if (title) { - ret->SetTitle(title) ; - } + if (name) ret->SetName(name) ; + if (title) ret->SetTitle(title) ; + ret->copyGlobalObservables(*this); return ret ; } @@ -463,7 +528,9 @@ RooAbsData* RooAbsData::reduce(const RooCmdArg& arg1,const RooCmdArg& arg2,const RooAbsData* RooAbsData::reduce(const char* cut) { RooFormulaVar cutVar(cut,cut,*get()) ; - return reduceEng(*get(),&cutVar,0,0,std::numeric_limits::max(),kFALSE) ; + RooAbsData* ret = reduceEng(*get(),&cutVar,0,0,std::numeric_limits::max(),kFALSE) ; + ret->copyGlobalObservables(*this); + return ret; } //////////////////////////////////////////////////////////////////////////////// @@ -473,7 +540,9 @@ RooAbsData* RooAbsData::reduce(const char* cut) RooAbsData* RooAbsData::reduce(const RooFormulaVar& cutVar) { - return reduceEng(*get(),&cutVar,0,0,std::numeric_limits::max(),kFALSE) ; + RooAbsData* ret = reduceEng(*get(),&cutVar,0,0,std::numeric_limits::max(),kFALSE) ; + ret->copyGlobalObservables(*this); + return ret; } //////////////////////////////////////////////////////////////////////////////// @@ -496,11 +565,15 @@ RooAbsData* RooAbsData::reduce(const RooArgSet& varSubset, const char* cut) } } + RooAbsData* ret = nullptr; if (cut && strlen(cut)>0) { RooFormulaVar cutVar(cut, cut, *get(), false); - return reduceEng(varSubset2,&cutVar,0,0,std::numeric_limits::max(),kFALSE) ; + ret = reduceEng(varSubset2,&cutVar,0,0,std::numeric_limits::max(),false); + } else { + ret = reduceEng(varSubset2,0,0,0,std::numeric_limits::max(),false); } - return reduceEng(varSubset2,0,0,0,std::numeric_limits::max(),kFALSE) ; + ret->copyGlobalObservables(*this); + return ret; } //////////////////////////////////////////////////////////////////////////////// @@ -514,18 +587,17 @@ RooAbsData* RooAbsData::reduce(const RooArgSet& varSubset, const RooFormulaVar& { // Make sure varSubset doesn't contain any variable not in this dataset RooArgSet varSubset2(varSubset) ; - TIterator* iter = varSubset.createIterator() ; - RooAbsArg* arg ; - while((arg=(RooAbsArg*)iter->Next())) { + for(RooAbsArg * arg : varSubset) { if (!_vars.find(arg->GetName())) { coutW(InputArguments) << "RooAbsData::reduce(" << GetName() << ") WARNING: variable " << arg->GetName() << " not in dataset, ignored" << endl ; varSubset2.remove(*arg) ; } } - delete iter ; - return reduceEng(varSubset2,&cutVar,0,0,std::numeric_limits::max(),kFALSE) ; + RooAbsData* ret = reduceEng(varSubset2,&cutVar,0,0,std::numeric_limits::max(),kFALSE) ; + ret->copyGlobalObservables(*this); + return ret; } @@ -2357,3 +2429,20 @@ void RooAbsData::RecursiveRemove(TObject *obj) } } } + +//////////////////////////////////////////////////////////////////////////////// +/// Sets the global observables stored in this data. A snapshot of the +/// observables will be saved. +/// \param[in] globalObservables The set of global observables to take a snapshot of. + +void RooAbsData::setGlobalObservables(RooArgSet const& globalObservables) { + if(_globalObservables == nullptr) _globalObservables = std::make_unique(); + else _globalObservables->clear(); + globalObservables.snapshot(*_globalObservables); + for(auto * arg : *_globalObservables) { + arg->setAttribute("global",true); + // Global observables are also always constant in fits + if(auto lval = dynamic_cast(arg)) lval->setConstant(true); + if(auto lval = dynamic_cast(arg)) lval->setConstant(true); + } +} diff --git a/roofit/roofitcore/src/RooAbsOptTestStatistic.cxx b/roofit/roofitcore/src/RooAbsOptTestStatistic.cxx index 42ae9f78af5c2..3489723fe306b 100644 --- a/roofit/roofitcore/src/RooAbsOptTestStatistic.cxx +++ b/roofit/roofitcore/src/RooAbsOptTestStatistic.cxx @@ -410,6 +410,13 @@ void RooAbsOptTestStatistic::initSlave(RooAbsReal& real, RooAbsData& indata, con optimizeCaching() ; + // It would be unusual if the global observables are used in the likelihood + // outside of the constraint terms, but if they are we have to be consistent + // and also redirect them to the snapshots in the dataset if appropriate. + if(_takeGlobalObservablesFromData && _data->getGlobalObservables()) { + recursiveRedirectServers(*_data->getGlobalObservables()) ; + } + } @@ -716,10 +723,15 @@ Bool_t RooAbsOptTestStatistic::setDataSlave(RooAbsData& indata, Bool_t cloneData //indata.Print("v") ; - // Delete previous dataset now, if it was owned + // If the current dataset is owned, transfer the ownership to unique pointer + // that will get out of scope at the end of this function. We can't delete it + // right now, because there might be global observables in the model that + // first need to be redirected to the new dataset with a later call to + // RooAbsArg::recursiveRedirectServers. + std::unique_ptr oldOwnedData; if (_ownData) { - delete _dataClone ; - _dataClone = 0 ; + oldOwnedData.reset(_dataClone); + _dataClone = nullptr ; } if (!cloneData && _rangeName.size()>0) { @@ -760,7 +772,12 @@ Bool_t RooAbsOptTestStatistic::setDataSlave(RooAbsData& indata, Bool_t cloneData setValueDirty() ; -// cout << "RAOTS::setDataSlave(" << this << ") END" << endl ; + // It would be unusual if the global observables are used in the likelihood + // outside of the constraint terms, but if they are we have to be consistent + // and also redirect them to the snapshots in the dataset if appropriate. + if(_takeGlobalObservablesFromData && _data->getGlobalObservables()) { + recursiveRedirectServers(*_data->getGlobalObservables()) ; + } return kTRUE ; } diff --git a/roofit/roofitcore/src/RooAbsPdf.cxx b/roofit/roofitcore/src/RooAbsPdf.cxx index 28025304e572c..f8ddc456af029 100644 --- a/roofit/roofitcore/src/RooAbsPdf.cxx +++ b/roofit/roofitcore/src/RooAbsPdf.cxx @@ -891,6 +891,16 @@ Double_t RooAbsPdf::extendedTerm(Double_t observed, const RooArgSet* nset) const /// `ExternalConstraints(const RooArgSet& )` Include given external constraints to likelihood by multiplying them with the original likelihood. /// `GlobalObservables(const RooArgSet&)` Define the set of normalization observables to be used for the constraint terms. /// If none are specified the constrained parameters are used. +/// `GlobalObservablesSource(const char* sourceName)` Which source to prioritize for global observable values. +/// Can be either: +/// - `data`: to take the values from the dataset, +/// falling back to the pdf value if a given global observable is not available. +/// If no `GlobalObservables` or `GlobalObservablesTag` command argument is given, the set +/// of global observables will be automatically defined to be the set stored in the data. +/// - `model`: to take all values from the pdf and completely ignore the set of global observables stored in the data +/// (not even using it to automatically define the set of global observables +/// if the `GlobalObservables` or `GlobalObservablesTag` command arguments are not given). +/// The default option is `data`. /// `GlobalObservablesTag(const char* tagName)` Define the set of normalization observables to be used for the constraint terms by /// a string attribute associated with pdf observables that match the given tagName. /// `Verbose(Bool_t flag)` Controls RooFit informational messages in likelihood construction @@ -984,6 +994,7 @@ RooAbsReal* RooAbsPdf::createNLL(RooAbsData& data, const RooLinkedList& cmdList) pc.defineString("rangeName","RangeWithName",0,"",kTRUE) ; pc.defineString("addCoefRange","SumCoefRange",0,"") ; pc.defineString("globstag","GlobalObservablesTag",0,"") ; + pc.defineString("globssource","GlobalObservablesSource",0,"data") ; pc.defineDouble("rangeLo","Range",0,-999.) ; pc.defineDouble("rangeHi","Range",1,-999.) ; pc.defineInt("splitRange","SplitRange",0,0) ; @@ -1067,6 +1078,14 @@ RooAbsReal* RooAbsPdf::createNLL(RooAbsData& data, const RooLinkedList& cmdList) projDeps.add(*tmp) ; } + const std::string globalObservablesSource = pc.getString("globssource","data",false); + if(globalObservablesSource != "data" && globalObservablesSource != "model") { + std::string errMsg = "RooAbsPdf::fitTo: GlobalObservablesSource can only be \"data\" or \"model\"!"; + coutE(InputArguments) << errMsg << std::endl; + throw std::invalid_argument(errMsg); + } + const bool takeGlobalObservablesFromData = globalObservablesSource == "data"; + // Construct NLL RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CollectErrors) ; RooAbsReal* nll ; @@ -1080,6 +1099,7 @@ RooAbsReal* RooAbsPdf::createNLL(RooAbsData& data, const RooLinkedList& cmdList) cfg.cloneInputData = static_cast(cloneData); cfg.integrateOverBinsPrecision = pc.getDouble("IntegrateBins"); cfg.binnedL = false; + cfg.takeGlobalObservablesFromData = takeGlobalObservablesFromData; if (!rangeName || strchr(rangeName,',')==0) { // Simple case: default range, or single restricted range //cout<<"FK: Data test 1: "< rw(minimizer.save()); @@ -1216,7 +1248,20 @@ int RooAbsPdf::calculateAsymptoticCorrectedCovMatrix(RooMinimizer &minimizer, Ro return rw->covQual(); } -int RooAbsPdf::calculateSumW2CorrectedCovMatrix(RooMinimizer &minimizer, RooAbsReal const &nll) const + +//////////////////////////////////////////////////////////////////////////////// +/// Apply correction to errors and covariance matrix. This uses two covariance +/// matrices, one with the weights, the other with squared weights, to obtain +/// the correct errors for weighted likelihood fits. +/// Applies the calculated covaraince matrix to the RooMinimizer and returns +/// the quality of the covariance matrix. +/// See also the documentation of RooAbsPdf::fitTo(), where this function is used. +/// \param[in] minimizer The RooMinimizer to get the fit result from. The state +/// of the minimizer will be altered by this function: the covariance +/// matrix caltulated here will be applied to it via +/// RooMinimizer::applyCovarianceMatrix(). +/// \param[in] nll The NLL object that was used for the fit. +int RooAbsPdf::calcSumW2CorrectedCovariance(RooMinimizer &minimizer, RooAbsReal const &nll) const { // Make list of RooNLLVar components of FCN @@ -1420,6 +1465,101 @@ RooFitResult* RooAbsPdf::fitTo(RooAbsData& data, const RooCmdArg& arg1, const Ro } +//////////////////////////////////////////////////////////////////////////////// +/// Minimizes a given NLL variable by finding the optimal parameters with the +/// RooMinimzer. The NLL variable can be created with RooAbsPdf::createNLL. +/// If you are looking for a function that combines likelihood creation with +/// fitting, see RooAbsPdf::fitTo. +/// \param[in] nll The negative log-likelihood variable to minimize. +/// \param[in] data The dataset that was als used for the NLL. It's a necessary +/// parameter because it is used in the asymptotic error correction. +/// \param[in] cfg Configuration struct with all the configuration options for +/// the RooMinimizer. These are a subset of the options that you can +/// also pass to RooAbsPdf::fitTo via the RooFit command arguments. +std::unique_ptr RooAbsPdf::minimizeNLL(RooAbsReal & nll, + RooAbsData const& data, MinimizerConfig const& cfg) { + + // Determine if the dataset has weights + bool weightedData = data.isNonPoissonWeighted(); + + // Warn user that a method to determine parameter uncertainties should be provided if weighted data is offered + if (weightedData && cfg.doSumW2==-1 && cfg.doAsymptotic==-1) { + coutW(InputArguments) << "RooAbsPdf::fitTo(" << GetName() << ") WARNING: a likelihood fit is requested of what appears to be weighted data.\n" + << " While the estimated values of the parameters will always be calculated taking the weights into account,\n" + << " there are multiple ways to estimate the errors of the parameters. You are advised to make an \n" + << " explicit choice for the error calculation:\n" + << " - Either provide SumW2Error(true), to calculate a sum-of-weights-corrected HESSE error matrix\n" + << " (error will be proportional to the number of events in MC).\n" + << " - Or provide SumW2Error(false), to return errors from original HESSE error matrix\n" + << " (which will be proportional to the sum of the weights, i.e., a dataset with events).\n" + << " - Or provide AsymptoticError(true), to use the asymptotically correct expression\n" + << " (for details see https://arxiv.org/abs/1911.01303)." + << endl ; + } + + if (cfg.minos && (cfg.doSumW2==1 || cfg.doAsymptotic == 1)) { + coutE(InputArguments) << "RooAbsPdf::fitTo(" << GetName() << "): sum-of-weights and asymptotic error correction do not work with MINOS errors. Not fitting." << endl; + return nullptr; + } + if (cfg.doAsymptotic==1 && cfg.minos) { + coutW(InputArguments) << "RooAbsPdf::fitTo(" << GetName() << ") WARNING: asymptotic correction does not apply to MINOS errors" << endl ; + } + + //avoid setting both SumW2 and Asymptotic for uncertainty correction + if (cfg.doSumW2==1 && cfg.doAsymptotic==1) { + coutE(InputArguments) << "RooAbsPdf::fitTo(" << GetName() << ") ERROR: Cannot compute both asymptotically correct and SumW2 errors." << endl ; + return nullptr; + } + + // Instantiate RooMinimizer + + RooMinimizer m(nll); + m.setMinimizerType(cfg.minType.c_str()); + m.setEvalErrorWall(cfg.doEEWall); + m.setRecoverFromNaNStrength(cfg.recoverFromNaN); + m.setPrintEvalErrors(cfg.numee); + if (cfg.printLevel!=1) m.setPrintLevel(cfg.printLevel); + if (cfg.optConst) m.optimizeConst(cfg.optConst); // Activate constant term optimization + + if (!cfg.fitOpt.empty()) { + + // Play fit options as historically defined + std::unique_ptr ret{m.fit(cfg.fitOpt.c_str())}; + if (cfg.optConst) m.optimizeConst(0) ; + return ret; + + } + + if (cfg.verbose) m.setVerbose(1); // Activate verbose options + if (cfg.doTimer) m.setProfile(1); // Activate timer options + if (cfg.strat!=1) m.setStrategy(cfg.strat); // Modify fit strategy + if (cfg.initHesse) m.hesse(); // Initialize errors with hesse + m.minimize(cfg.minType.c_str(), cfg.minAlg.c_str()); // Minimize using chosen algorithm + if (cfg.hesse) m.hesse(); // Evaluate errors with Hesse + + int corrCovQual = -1; + + if (m.getNPar()>0) { + if (cfg.doAsymptotic == 1) corrCovQual = calcAsymptoticCorrectedCovariance(m, data); // Asymptotically correct + if (cfg.doSumW2 == 1) corrCovQual = calcSumW2CorrectedCovariance(m, nll); + } + + if (cfg.minos) cfg.minosSet ? m.minos(*cfg.minosSet) : m.minos(); // Evaluate errs with Minos + + // Optionally return fit result + std::unique_ptr ret; + if (cfg.doSave) { + auto name = std::string("fitresult_") + GetName() + "_" + data.GetName(); + auto title = std::string("Result of fit of p.d.f. ") + GetName() + " to dataset " + data.GetName(); + ret.reset(m.save(name.c_str(),title.c_str())); + if((cfg.doSumW2==1 || cfg.doAsymptotic==1) && m.getNPar()>0) ret->setCovQual(corrCovQual); + } + + if (cfg.optConst) m.optimizeConst(0) ; + return ret ; +} + + //////////////////////////////////////////////////////////////////////////////// /// Fit PDF to given dataset. If dataset is unbinned, an unbinned maximum likelihood is performed. If the dataset @@ -1438,31 +1578,36 @@ RooFitResult* RooAbsPdf::fitTo(RooAbsData& data, const RooLinkedList& cmdList) RooLinkedList fitCmdList(cmdList) ; RooLinkedList nllCmdList = pc.filterCmdList(fitCmdList,"ProjectedObservables,Extended,Range," "RangeWithName,SumCoefRange,NumCPU,SplitRange,Constrained,Constrain,ExternalConstraints," - "CloneData,GlobalObservables,GlobalObservablesTag,OffsetLikelihood,BatchMode,IntegrateBins"); + "CloneData,GlobalObservables,GlobalObservablesSource,GlobalObservablesTag,OffsetLikelihood," + "BatchMode,IntegrateBins"); + + // Default-initialized instance of MinimizerConfig to get the default + // minimizer parameter values. + MinimizerConfig minimizerDefaults; pc.defineDouble("prefit", "Prefit",0,0); - pc.defineDouble("RecoverFromUndefinedRegions", "RecoverFromUndefinedRegions",0,10.); - pc.defineString("fitOpt","FitOptions",0,"") ; - pc.defineInt("optConst","Optimize",0,2) ; - pc.defineInt("verbose","Verbose",0,0) ; - pc.defineInt("doSave","Save",0,0) ; - pc.defineInt("doTimer","Timer",0,0) ; - pc.defineInt("plevel","PrintLevel",0,1) ; - pc.defineInt("strat","Strategy",0,1) ; - pc.defineInt("initHesse","InitialHesse",0,0) ; - pc.defineInt("hesse","Hesse",0,1) ; - pc.defineInt("minos","Minos",0,0) ; + pc.defineDouble("RecoverFromUndefinedRegions", "RecoverFromUndefinedRegions",0,minimizerDefaults.recoverFromNaN); + pc.defineString("fitOpt","FitOptions",0,minimizerDefaults.fitOpt.c_str()) ; + pc.defineInt("optConst","Optimize",0,minimizerDefaults.optConst) ; + pc.defineInt("verbose","Verbose",0,minimizerDefaults.verbose) ; + pc.defineInt("doSave","Save",0,minimizerDefaults.doSave) ; + pc.defineInt("doTimer","Timer",0,minimizerDefaults.doTimer) ; + pc.defineInt("printLevel","PrintLevel",0,minimizerDefaults.printLevel) ; + pc.defineInt("strat","Strategy",0,minimizerDefaults.strat) ; + pc.defineInt("initHesse","InitialHesse",0,minimizerDefaults.initHesse) ; + pc.defineInt("hesse","Hesse",0,minimizerDefaults.hesse) ; + pc.defineInt("minos","Minos",0,minimizerDefaults.minos) ; pc.defineInt("ext","Extended",0,2) ; pc.defineInt("numcpu","NumCPU",0,1) ; - pc.defineInt("numee","PrintEvalErrors",0,10) ; - pc.defineInt("doEEWall","EvalErrorWall",0,1) ; - pc.defineInt("doWarn","Warnings",0,1) ; - pc.defineInt("doSumW2","SumW2Error",0,-1) ; - pc.defineInt("doAsymptoticError","AsymptoticError",0,-1) ; + pc.defineInt("numee","PrintEvalErrors",0,minimizerDefaults.numee) ; + pc.defineInt("doEEWall","EvalErrorWall",0,minimizerDefaults.doEEWall) ; + pc.defineInt("doWarn","Warnings",0,minimizerDefaults.doWarn) ; + pc.defineInt("doSumW2","SumW2Error",0,minimizerDefaults.doSumW2) ; + pc.defineInt("doAsymptoticError","AsymptoticError",0,minimizerDefaults.doAsymptotic) ; pc.defineInt("doOffset","OffsetLikelihood",0,0) ; - pc.defineString("mintype","Minimizer",0,"Minuit") ; - pc.defineString("minalg","Minimizer",1,"minuit") ; - pc.defineObject("minosSet","Minos",0,0) ; + pc.defineString("mintype","Minimizer",0,minimizerDefaults.minType.c_str()) ; + pc.defineString("minalg","Minimizer",1,minimizerDefaults.minAlg.c_str()) ; + pc.defineObject("minosSet","Minos",0,minimizerDefaults.minosSet) ; pc.defineSet("cPars","Constrain",0,0) ; pc.defineSet("extCons","ExternalConstraints",0,0) ; pc.defineMutex("FitOptions","Verbose") ; @@ -1483,25 +1628,7 @@ RooFitResult* RooAbsPdf::fitTo(RooAbsData& data, const RooLinkedList& cmdList) // Decode command line arguments Double_t prefit = pc.getDouble("prefit"); - const double recoverFromNaN = pc.getDouble("RecoverFromUndefinedRegions"); - const char* fitOpt = pc.getString("fitOpt",0,kTRUE) ; Int_t optConst = pc.getInt("optConst") ; - Int_t verbose = pc.getInt("verbose") ; - Int_t doSave = pc.getInt("doSave") ; - Int_t doTimer = pc.getInt("doTimer") ; - Int_t plevel = pc.getInt("plevel") ; - Int_t strat = pc.getInt("strat") ; - Int_t initHesse= pc.getInt("initHesse") ; - Int_t hesse = pc.getInt("hesse") ; - Int_t minos = pc.getInt("minos") ; - Int_t numee = pc.getInt("numee") ; - Int_t doEEWall = pc.getInt("doEEWall") ; - Int_t doWarn = pc.getInt("doWarn") ; - Int_t doSumW2 = pc.getInt("doSumW2") ; - Int_t doAsymptotic = pc.getInt("doAsymptoticError"); - const RooArgSet* minosSet = static_cast(pc.getObject("minosSet")) ; - const char* minType = pc.getString("mintype","Minuit") ; - const char* minAlg = pc.getString("minalg","minuit") ; if (optConst > 1) { // optConst >= 2 is pre-computating values, which are never used when @@ -1516,33 +1643,6 @@ RooFitResult* RooAbsPdf::fitTo(RooAbsData& data, const RooLinkedList& cmdList) } } - - // Determine if the dataset has weights - Bool_t weightedData = data.isNonPoissonWeighted() ; - - // Warn user that a method to determine parameter uncertainties should be provided if weighted data is offered - if (weightedData && doSumW2==-1 && doAsymptotic==-1) { - coutW(InputArguments) << "RooAbsPdf::fitTo(" << GetName() << ") WARNING: a likelihood fit is requested of what appears to be weighted data.\n" - << " While the estimated values of the parameters will always be calculated taking the weights into account,\n" - << " there are multiple ways to estimate the errors of the parameters. You are advised to make an \n" - << " explicit choice for the error calculation:\n" - << " - Either provide SumW2Error(true), to calculate a sum-of-weights-corrected HESSE error matrix\n" - << " (error will be proportional to the number of events in MC).\n" - << " - Or provide SumW2Error(false), to return errors from original HESSE error matrix\n" - << " (which will be proportional to the sum of the weights, i.e., a dataset with events).\n" - << " - Or provide AsymptoticError(true), to use the asymptotically correct expression\n" - << " (for details see https://arxiv.org/abs/1911.01303)." - << endl ; - } - - if (minos && (doSumW2==1 || doAsymptotic == 1)) { - coutE(InputArguments) << "RooAbsPdf::fitTo(" << GetName() << "): sum-of-weights and asymptotic error correction do not work with MINOS errors. Not fitting." << endl; - return nullptr; - } - if (doAsymptotic==1 && minos) { - coutW(InputArguments) << "RooAbsPdf::fitTo(" << GetName() << ") WARNING: asymptotic correction does not apply to MINOS errors" << endl ; - } - if (prefit != 0) { size_t nEvents = static_cast(prefit*data.numEntries()); if (prefit > 0.5 || nEvents < 100) { @@ -1578,111 +1678,30 @@ RooFitResult* RooAbsPdf::fitTo(RooAbsData& data, const RooLinkedList& cmdList) } } - RooAbsReal* nll = createNLL(data,nllCmdList) ; - RooFitResult *ret = 0 ; - - //avoid setting both SumW2 and Asymptotic for uncertainty correction - if (doSumW2==1 && doAsymptotic==1) { - coutE(InputArguments) << "RooAbsPdf::fitTo(" << GetName() << ") ERROR: Cannot compute both asymptotically correct and SumW2 errors." << endl ; - return ret; - } - - // Instantiate MINUIT - - RooMinimizer m(*nll) ; - - m.setMinimizerType(minType) ; - - m.setEvalErrorWall(doEEWall) ; - m.setRecoverFromNaNStrength(recoverFromNaN); - if (doWarn==0) { - // m.setNoWarn() ; WVE FIX THIS - } - - m.setPrintEvalErrors(numee) ; - if (plevel!=1) { - m.setPrintLevel(plevel) ; - } - - if (optConst) { - // Activate constant term optimization - m.optimizeConst(optConst) ; - } - - if (fitOpt) { - - // Play fit options as historically defined - ret = m.fit(fitOpt) ; - - } else { - - if (verbose) { - // Activate verbose options - m.setVerbose(1) ; - } - if (doTimer) { - // Activate timer options - m.setProfile(1) ; - } - - if (strat!=1) { - // Modify fit strategy - m.setStrategy(strat) ; - } - - if (initHesse) { - // Initialize errors with hesse - m.hesse() ; - } - - // Minimize using chosen algorithm - m.minimize(minType,minAlg) ; - - if (hesse) { - // Evaluate errors with Hesse - m.hesse() ; - } - - int corrCovQual = -1; - - //asymptotically correct approach - if (doAsymptotic==1 && m.getNPar()>0) { - corrCovQual = calculateAsymptoticCorrectedCovMatrix(m, data); - } - - if (doSumW2==1 && m.getNPar()>0) { - corrCovQual = calculateSumW2CorrectedCovMatrix(m, *nll); - } - - if (minos) { - // Evaluate errs with Minos - if (minosSet) { - m.minos(*minosSet) ; - } else { - m.minos() ; - } - } - - // Optionally return fit result - if (doSave) { - string name = Form("fitresult_%s_%s",GetName(),data.GetName()) ; - string title = Form("Result of fit of p.d.f. %s to dataset %s",GetName(),data.GetName()) ; - ret = m.save(name.c_str(),title.c_str()) ; - if((doSumW2==1 || doAsymptotic==1) && m.getNPar()>0) { - ret->setCovQual(corrCovQual); - } - } - - } - - if (optConst) { - m.optimizeConst(0) ; - } - - - // Cleanup - delete nll ; - return ret ; + std::unique_ptr nll{createNLL(data,nllCmdList)}; + + MinimizerConfig cfg; + cfg.recoverFromNaN = pc.getDouble("RecoverFromUndefinedRegions"); + cfg.fitOpt = pc.getString("fitOpt",0,true) ? pc.getString("fitOpt",0,true) : ""; + cfg.optConst = optConst; + cfg.verbose = pc.getInt("verbose"); + cfg.doSave = pc.getInt("doSave"); + cfg.doTimer = pc.getInt("doTimer"); + cfg.printLevel = pc.getInt("printLevel"); + cfg.strat = pc.getInt("strat"); + cfg.initHesse = pc.getInt("initHesse"); + cfg.hesse = pc.getInt("hesse"); + cfg.minos = pc.getInt("minos"); + cfg.numee = pc.getInt("numee"); + cfg.doEEWall = pc.getInt("doEEWall"); + cfg.doWarn = pc.getInt("doWarn"); + cfg.doSumW2 = pc.getInt("doSumW2"); + cfg.doAsymptotic = pc.getInt("doAsymptoticError"); + cfg.minosSet = static_cast(pc.getObject("minosSet")); + cfg.minType = pc.getString("mintype","Minuit"); + cfg.minAlg = pc.getString("minalg","minuit"); + + return minimizeNLL(*nll, data, cfg).release(); } diff --git a/roofit/roofitcore/src/RooAbsTestStatistic.cxx b/roofit/roofitcore/src/RooAbsTestStatistic.cxx index 2c4f465cdcdd3..e3785db99c8e5 100644 --- a/roofit/roofitcore/src/RooAbsTestStatistic.cxx +++ b/roofit/roofitcore/src/RooAbsTestStatistic.cxx @@ -100,7 +100,8 @@ RooAbsTestStatistic::RooAbsTestStatistic(const char *name, const char *title, Ro _gofOpMode{(cfg.nCPU>1 || cfg.nCPU==-1) ? MPMaster : (dynamic_cast(_func) ? SimMaster : Slave)}, _nEvents{data.numEntries()}, _nCPU(cfg.nCPU != -1 ? cfg.nCPU : 1), - _mpinterl(cfg.interleave) + _mpinterl(cfg.interleave), + _takeGlobalObservablesFromData{cfg.takeGlobalObservablesFromData} { // Register all parameters as servers _paramSet.add(*std::unique_ptr{real.getParameters(&data)}); @@ -138,6 +139,7 @@ RooAbsTestStatistic::RooAbsTestStatistic(const RooAbsTestStatistic& other, const _nCPU(other._nCPU != -1 ? other._nCPU : 1), _mpinterl(other._mpinterl), _doOffset(other._doOffset), + _takeGlobalObservablesFromData{other._takeGlobalObservablesFromData}, _offset(other._offset), _evalCarry(other._evalCarry) { @@ -412,6 +414,7 @@ void RooAbsTestStatistic::initMPMode(RooAbsReal* real, RooAbsData* data, const R cfg.interleave = _mpinterl; cfg.verbose = _verbose; cfg.splitCutRange = _splitRange; + cfg.takeGlobalObservablesFromData = _takeGlobalObservablesFromData; // This configuration parameter is stored in the RooAbsOptTestStatistic. // It would have been cleaner to move the member variable into RooAbsTestStatistic, // but to avoid incrementing the class version we do the dynamic_cast trick. @@ -524,6 +527,7 @@ void RooAbsTestStatistic::initSimMode(RooSimultaneous* simpdf, RooAbsData* data, cfg.verbose = _verbose; cfg.splitCutRange = _splitRange; cfg.binnedL = binnedL; + cfg.takeGlobalObservablesFromData = _takeGlobalObservablesFromData; // This configuration parameter is stored in the RooAbsOptTestStatistic. // It would have been cleaner to move the member variable into RooAbsTestStatistic, // but to avoid incrementing the class version we do the dynamic_cast trick. @@ -601,20 +605,19 @@ Bool_t RooAbsTestStatistic::setData(RooAbsData& indata, Bool_t cloneData) return setDataSlave(indata, cloneData); case SimMaster: // Forward to slaves - // cout << "RATS::setData(" << GetName() << ") SimMaster, calling setDataSlave() on slave nodes" << endl; if (indata.canSplitFast()) { - for (Int_t i = 0; i < _nGof; ++i) { - RooAbsData* compData = indata.getSimData(_gofArray[i]->GetName()); - _gofArray[i]->setDataSlave(*compData, cloneData); + for (int i = 0; i < _nGof; ++i) { + RooAbsData* compData = indata.getSimData(_gofArray[i]->GetName()); + _gofArray[i]->setDataSlave(*compData, cloneData); } } else if (0 == indata.numEntries()) { // For an unsplit empty dataset, simply assign empty dataset to each component - for (Int_t i = 0; i < _nGof; ++i) { - _gofArray[i]->setDataSlave(indata, cloneData); + for (int i = 0; i < _nGof; ++i) { + _gofArray[i]->setDataSlave(indata, cloneData); } } else { const RooAbsCategoryLValue& indexCat = static_cast(_func)->indexCat(); - TList* dlist = indata.split(indexCat, kTRUE); + std::unique_ptr dlist{indata.split(indexCat, true)}; if (!dlist) { coutF(DataHandling) << "Tried to split '" << indata.GetName() << "' into categories of '" << indexCat.GetName() << "', but splitting failed. Input data:" << std::endl; @@ -622,16 +625,13 @@ Bool_t RooAbsTestStatistic::setData(RooAbsData& indata, Bool_t cloneData) throw std::runtime_error("Error when setting up test statistic: dataset couldn't be split into categories."); } - for (Int_t i = 0; i < _nGof; ++i) { - RooAbsData* compData = (RooAbsData*) dlist->FindObject(_gofArray[i]->GetName()); - // cout << "component data for index " << _gofArray[i]->GetName() << " is " << compData << endl; - if (compData) { + for (int i = 0; i < _nGof; ++i) { + if (auto compData = static_cast(dlist->FindObject(_gofArray[i]->GetName()))) { _gofArray[i]->setDataSlave(*compData,kFALSE,kTRUE); } else { coutE(DataHandling) << "RooAbsTestStatistic::setData(" << GetName() << ") ERROR: Cannot find component data for state " << _gofArray[i]->GetName() << endl; } } - delete dlist; // delete only list, data will be used } break; case MPMaster: @@ -641,7 +641,7 @@ Bool_t RooAbsTestStatistic::setData(RooAbsData& indata, Bool_t cloneData) break; } - return kTRUE; + return true; } diff --git a/roofit/roofitcore/src/RooConstraintSum.cxx b/roofit/roofitcore/src/RooConstraintSum.cxx index 89f602b1f8155..0cdb6372ac75f 100644 --- a/roofit/roofitcore/src/RooConstraintSum.cxx +++ b/roofit/roofitcore/src/RooConstraintSum.cxx @@ -28,6 +28,7 @@ arguments. #include "RooConstraintSum.h" +#include "RooAbsData.h" #include "RooAbsReal.h" #include "RooAbsPdf.h" #include "RooErrorHandler.h" @@ -35,6 +36,8 @@ arguments. #include "RooMsgService.h" #include "RooHelpers.h" #include "RooWorkspace.h" +#include "RooAbsRealLValue.h" +#include "RooAbsCategoryLValue.h" #include @@ -44,10 +47,11 @@ ClassImp(RooConstraintSum); //////////////////////////////////////////////////////////////////////////////// /// Constructor with set of constraint p.d.f.s. All elements in constraintSet must inherit from RooAbsPdf. -RooConstraintSum::RooConstraintSum(const char* name, const char* title, const RooArgSet& constraintSet, const RooArgSet& normSet) : +RooConstraintSum::RooConstraintSum(const char* name, const char* title, const RooArgSet& constraintSet, const RooArgSet& normSet, bool takeGlobalObservablesFromData) : RooAbsReal(name, title), _set1("set1","First set of components",this), - _paramSet("paramSet","Set of parameters",this) + _paramSet("paramSet","Set of parameters",this), + _takeGlobalObservablesFromData{takeGlobalObservablesFromData} { for (const auto comp : constraintSet) { if (!dynamic_cast(comp)) { @@ -68,7 +72,8 @@ RooConstraintSum::RooConstraintSum(const char* name, const char* title, const Ro RooConstraintSum::RooConstraintSum(const RooConstraintSum& other, const char* name) : RooAbsReal(other, name), _set1("set1",this,other._set1), - _paramSet("paramSet",this,other._paramSet) + _paramSet("paramSet",this,other._paramSet), + _takeGlobalObservablesFromData{other._takeGlobalObservablesFromData} { } @@ -88,6 +93,19 @@ Double_t RooConstraintSum::evaluate() const } +//////////////////////////////////////////////////////////////////////////////// +/// Replace the variables in this RooConstraintSum with the global observables +/// in the dataset if they match by name. This function will do nothing if this +/// RooConstraintSum is configured to not use the global observables stored in +/// datasets. +bool RooConstraintSum::setData(RooAbsData const& data, bool /*cloneData=true*/) { + if(_takeGlobalObservablesFromData && data.getGlobalObservables()) { + this->recursiveRedirectServers(*data.getGlobalObservables()) ; + } + return true; +} + + namespace { std::unique_ptr getGlobalObservables( @@ -155,8 +173,10 @@ RooArgSet const* tryToGetConstraintSetFromWorkspace( /// \param[in] pdf The pdf model whose parameters should be constrained. /// Constraint terms will be extracted from RooProdPdf instances /// that are servers of the pdf (internal constraints). -/// \param[in] observables Observable variables (used to determine which model -/// variables are parameters). +/// \param[in] data Dataset used in the fit with the constraint sum. It is +/// used to figure out which are the observables and also to get the +/// global observables definition and values if they are stored in +/// the dataset. /// \param[in] constraints Set of parameters to constrain. If `nullptr`, all /// parameters will be considered. /// \param[in] externalConstraints Set of constraint terms that are not @@ -170,17 +190,26 @@ RooArgSet const* tryToGetConstraintSetFromWorkspace( /// used. The `globalObservables` and `globalObservablesTag` /// parameters are mutually exclusive, meaning at least one of them /// has to be `nullptr`. +/// \param[in] takeGlobalObservablesFromData If the dataset should be used to automatically +/// define the set of global observables. If this is the case and the +/// set of global observables is still defined manually with the +/// `globalObservables` or `globalObservablesTag` parameters, the +/// values of all global observables that are not stored in the +/// dataset are taken from the model. /// \param[in] workspace RooWorkspace to cache the set of constraints. std::unique_ptr RooConstraintSum::createConstraintTerm( std::string const& name, RooAbsPdf const& pdf, - RooArgSet const& observables, + RooAbsData const& data, RooArgSet const* constrainedParameters, RooArgSet const* externalConstraints, RooArgSet const* globalObservables, const char* globalObservablesTag, + bool takeGlobalObservablesFromData, RooWorkspace * workspace) { + RooArgSet const& observables = *data.get(); + bool doStripDisconnected = false ; // If no explicit list of parameters to be constrained is specified apply default algorithm @@ -221,17 +250,58 @@ std::unique_ptr RooConstraintSum::createConstraintTerm( } } - auto glObs = getGlobalObservables(pdf, globalObservables, globalObservablesTag); - if (!allConstraints.empty()) { - oocoutI(&pdf, Minimization) << " Including the following constraint terms in minimization: " << allConstraints << std::endl ; - if (glObs) { - oocoutI(&pdf, Minimization) << "The following global observables have been defined: " << *glObs << std::endl ; + oocoutI(&pdf, Minimization) << " Including the following constraint terms in minimization: " + << allConstraints << std::endl ; + + // Identify global observables in the model. + auto glObs = getGlobalObservables(pdf, globalObservables, globalObservablesTag); + if(data.getGlobalObservables() && takeGlobalObservablesFromData) { + if(!glObs) { + // There were no global observables specified, but there are some in the + // dataset. We will just take them from the dataset. + oocoutI(&pdf, Minimization) + << "The following global observables have been automatically defined according to the dataset " + << "which also provides their values: " << *data.getGlobalObservables() << std::endl; + glObs = std::make_unique(*data.getGlobalObservables()); + } else { + // There are global observables specified by the user and also some in + // the dataset. + RooArgSet globalsFromDataset; + data.getGlobalObservables()->selectCommon(*glObs, globalsFromDataset); + oocoutI(&pdf, Minimization) + << "The following global observables have been defined: " << *glObs + << "," << " with the values of " << globalsFromDataset + << " obtained from the dataset and the other values from the model." << std::endl; + } + } else if(glObs) { + oocoutI(&pdf, Minimization) + << "The following global observables have been defined and their values are taken from the model: " + << *glObs << std::endl; } - auto constraintTerm = std::make_unique(name.c_str(),"nllCons",allConstraints,glObs ? *glObs : cPars) ; - constraintTerm->setOperMode(RooAbsArg::ADirty) ; - return constraintTerm; + + // The constraint terms need to be cloned, because the global observables + // might be changed to have the same values as stored in data. + RooConstraintSum constraintTerm{name.c_str(),"nllCons", allConstraints, glObs ? *glObs : cPars, takeGlobalObservablesFromData}; + std::unique_ptr constraintTermClone{static_cast(constraintTerm.cloneTree())}; + + // The parameters that are not connected to global observables from data + // need to be redirected to the original args to get the changes made by + // the minimizer. This excludes the global observables, where we take the + // clones with the values set to the values from the dataset if available. + RooArgSet allOriginalParams; + constraintTerm.getParameters(nullptr,allOriginalParams); + constraintTermClone->recursiveRedirectServers(allOriginalParams) ; + + // Redirect the global observables to the ones from the dataset if applicable. + static_cast(constraintTermClone.get())->setData(data, false) ; + + // The computation graph for the constraints is very small, no need to do + // the tracking of clean and dirty nodes here. + constraintTermClone->setOperMode(RooAbsArg::ADirty) ; + + return constraintTermClone; } // no constraints diff --git a/roofit/roofitcore/src/RooDataHist.cxx b/roofit/roofitcore/src/RooDataHist.cxx index fa341993d7260..5428c9a8bc5c5 100644 --- a/roofit/roofitcore/src/RooDataHist.cxx +++ b/roofit/roofitcore/src/RooDataHist.cxx @@ -270,6 +270,9 @@ RooDataHist::RooDataHist(std::string_view name, std::string_view title, const Ro /// category it will be added on the fly. The import command can be specified /// multiple times. /// Import(map&) As above, but allows specification of many imports in a single operation +/// `GlobalObservables(const RooArgSet&)` Define the set of global observables to be stored in this RooDataHist. +/// A snapshot of the passed RooArgSet is stored, meaning the values wont't change unexpectedly. +/// /// RooDataHist::RooDataHist(std::string_view name, std::string_view title, const RooArgList& vars, const RooCmdArg& arg1, const RooCmdArg& arg2, const RooCmdArg& arg3, @@ -292,6 +295,7 @@ RooDataHist::RooDataHist(std::string_view name, std::string_view title, const Ro pc.defineDouble("weight","Weight",0,1) ; pc.defineObject("dummy1","ImportDataHistSliceMany",0) ; pc.defineObject("dummy2","ImportHistoSliceMany",0) ; + pc.defineSet("glObs","GlobalObservables",0,0) ; pc.defineMutex("ImportHisto","ImportHistoSlice","ImportDataHistSlice") ; pc.defineDependency("ImportHistoSlice","IndexCat") ; pc.defineDependency("ImportDataHistSlice","IndexCat") ; @@ -309,6 +313,8 @@ RooDataHist::RooDataHist(std::string_view name, std::string_view title, const Ro return ; } + if(pc.getSet("glObs")) setGlobalObservables(*pc.getSet("glObs")); + TH1* impHist = static_cast(pc.getObject("impHist")) ; Bool_t impDens = pc.getInt("impDens") ; Double_t initWgt = pc.getDouble("weight") ; diff --git a/roofit/roofitcore/src/RooDataSet.cxx b/roofit/roofitcore/src/RooDataSet.cxx index 70f3073464e28..ecfa82c289b2e 100644 --- a/roofit/roofitcore/src/RooDataSet.cxx +++ b/roofit/roofitcore/src/RooDataSet.cxx @@ -215,6 +215,8 @@ RooDataSet::RooDataSet() : _wgtVar(0) /// Interpret the given variable as event weight rather than as observable /// StoreError(const RooArgSet&) Store symmetric error along with value for given subset of observables /// StoreAsymError(const RooArgSet&) Store asymmetric error along with value for given subset of observables +/// `GlobalObservables(const RooArgSet&)` Define the set of global observables to be stored in this RooDataSet. +/// A snapshot of the passed RooArgSet is stored, meaning the values wont't change unexpectedly. /// /// @@ -245,6 +247,7 @@ RooDataSet::RooDataSet(std::string_view name, std::string_view title, const RooA pc.defineObject("dummy2","LinkDataSliceMany",0) ; pc.defineSet("errorSet","StoreError",0) ; pc.defineSet("asymErrSet","StoreAsymError",0) ; + pc.defineSet("glObs","GlobalObservables",0,0) ; pc.defineMutex("ImportTree","ImportData","ImportDataSlice","LinkDataSlice","ImportFromFile") ; pc.defineMutex("CutSpec","CutVar") ; pc.defineMutex("WeightVarName","WeightVar") ; @@ -266,6 +269,8 @@ RooDataSet::RooDataSet(std::string_view name, std::string_view title, const RooA return ; } + if(pc.getSet("glObs")) setGlobalObservables(*pc.getSet("glObs")); + // Extract relevant objects TTree* impTree = static_cast(pc.getObject("impTree")) ; RooDataSet* impData = static_cast(pc.getObject("impData")) ; diff --git a/roofit/roofitcore/src/RooGlobalFunc.cxx b/roofit/roofitcore/src/RooGlobalFunc.cxx index 5661e33a44200..5e8ca5558ccf4 100644 --- a/roofit/roofitcore/src/RooGlobalFunc.cxx +++ b/roofit/roofitcore/src/RooGlobalFunc.cxx @@ -222,10 +222,8 @@ namespace RooFit { RooCmdArg SumCoefRange(const char* rangeName) { return RooCmdArg("SumCoefRange",0,0,0,0,rangeName,0,0,0) ; } RooCmdArg Constrain(const RooArgSet& params) { return RooCmdArg("Constrain",0,0,0,0,0,0,0,0,0,0,¶ms) ; } RooCmdArg Constrain(RooArgSet && params) { return Constrain(RooCmdArg::take(std::move(params))); } - RooCmdArg GlobalObservables(const RooArgSet& globs) { return RooCmdArg("GlobalObservables",0,0,0,0,0,0,0,0,0,0,&globs) ; } - RooCmdArg GlobalObservables(RooArgSet && globs) { return GlobalObservables(RooCmdArg::take(std::move(globs))); } + RooCmdArg GlobalObservablesSource(const char* sourceName) { return {"GlobalObservablesSource",0,0,0,0,sourceName,0,0,0}; } RooCmdArg GlobalObservablesTag(const char* tagName) { return RooCmdArg("GlobalObservablesTag",0,0,0,0,tagName,0,0,0) ; } -// RooCmdArg Constrained() { return RooCmdArg("Constrained",kTRUE,0,0,0,0,0,0,0) ; } RooCmdArg ExternalConstraints(const RooArgSet& cpdfs) { return RooCmdArg("ExternalConstraints",0,0,0,0,0,0,&cpdfs,0,0,0,&cpdfs) ; } RooCmdArg ExternalConstraints(RooArgSet && cpdfs) { return ExternalConstraints(RooCmdArg::take(std::move(cpdfs))); } RooCmdArg PrintEvalErrors(Int_t numErrors) { return RooCmdArg("PrintEvalErrors",numErrors,0,0,0,0,0,0,0) ; } diff --git a/roofit/roofitcore/test/CMakeLists.txt b/roofit/roofitcore/test/CMakeLists.txt index d64fd2252bbc8..4e2e833eb70e1 100644 --- a/roofit/roofitcore/test/CMakeLists.txt +++ b/roofit/roofitcore/test/CMakeLists.txt @@ -44,6 +44,6 @@ ROOT_ADD_GTEST(testRooProductPdf testRooProductPdf.cxx LIBRARIES RooFitCore) ROOT_ADD_GTEST(testNaNPacker testNaNPacker.cxx LIBRARIES RooFitCore) ROOT_ADD_GTEST(testRooSimultaneous testRooSimultaneous.cxx LIBRARIES RooFitCore RooFit) ROOT_ADD_GTEST(testRooGradMinimizerFcn testRooGradMinimizerFcn.cxx LIBRARIES RooFitCore) - ROOT_ADD_GTEST(testLikelihoodSerial TestStatistics/testLikelihoodSerial.cxx LIBRARIES RooFitCore RooFit) ROOT_ADD_GTEST(testRooRealL TestStatistics/RooRealL.cpp LIBRARIES RooFitCore RooFit) +ROOT_ADD_GTEST(testGlobalObservables testGlobalObservables.cxx LIBRARIES RooFit) diff --git a/roofit/roofitcore/test/testGlobalObservables.cxx b/roofit/roofitcore/test/testGlobalObservables.cxx new file mode 100644 index 0000000000000..fdf4799cf1a04 --- /dev/null +++ b/roofit/roofitcore/test/testGlobalObservables.cxx @@ -0,0 +1,432 @@ +// Tests for global observables +// Authors: Jonas Rembser, CERN 08/2021 + +#include "RooRealVar.h" +#include "RooMsgService.h" +#include "RooGaussian.h" +#include "RooPolynomial.h" +#include "RooAddPdf.h" +#include "RooDataSet.h" +#include "RooProdPdf.h" +#include "RooFitResult.h" +#include "RooConstVar.h" +#include "RooPoisson.h" +#include "RooProduct.h" +#include "RooWorkspace.h" + +#include "gtest/gtest.h" + +#include +#include + +namespace { + +// Helper function to check if two RooFitResults are not identical. +// We can't use RooFitResult::isIdentical() here, because it will print +// something when the comparison fails even with verbose set to false. +bool isNotIdentical(RooFitResult const &res1, RooFitResult const &res2) +{ + std::size_t n = res1.floatParsFinal().size(); + if (n != res2.floatParsFinal().size()) + return true; + for (std::size_t i = 0; i < n; ++i) { + if (static_cast(res1.floatParsFinal()[i]).getVal() != + static_cast(res2.floatParsFinal()[i]).getVal()) + return true; + } + return false; +} + +} // namespace + +// Test environment to verify that if we use the feature of storing global +// observables in a RooDataSet, we can reproduce the same fit results as when +// we track the global observables separately. +class TestGlobalObservables : public ::testing::Test { +public: + void SetUp() override + { + // silence log output + RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING); + + // observables + RooRealVar x("x", "x", 0.0, 0.0, 20.0); + + // global observables, always constant in fits + RooRealVar gm("gm", "gm", 11.0, 0.0, 20.0); + RooRealVar gs("gs", "gs", 1.0, 0.1, 10.0); + gm.setConstant(true); + gs.setConstant(true); + + // constrained parameters + RooRealVar f("f", "f", 0.5, 0.0, 1.0); + + // other parameters + RooRealVar m("m", "m", 10.0, 0.0, 20.0); + RooRealVar s("s", "s", 2.0, 0.1, 10.0); + + // We use the global observable also in the model for the event + // observables. It's unusual, but let's better do this to also cover the + // corner case where the global observable is not only part of the + // constraint term. + RooProduct sigma{"sigma", "sigma", {s, gs}}; + + // build the unconstrained model + RooGaussian model("model", "model", x, m, sigma); + + // the constraint pdfs, they are RooPoisson so we can't have tests that accidentally + // pass because of the symmetry of normalizing over x or mu + RooPoisson mconstraint("mconstraint", "mconstraint", gm, m); + RooPoisson sconstraint("sconstraint", "sconstraint", gs, s); + + // the model multiplied with the constraint term + RooProdPdf modelc("modelc", "modelc", RooArgSet(model, mconstraint, sconstraint)); + + // generate small dataset for use in fitting below, also cloned versions + // with one or two global observables attached + _data.reset(model.generate(x, 50)); + + _dataWithMeanSigmaGlobs.reset( + static_cast(_data->Clone((std::string(_data->GetName()) + "_gm_gs").c_str()))); + _dataWithMeanSigmaGlobs->setGlobalObservables({gm, gs}); + + _dataWithMeanGlob.reset(static_cast(_data->Clone((std::string(_data->GetName()) + "_gm").c_str()))); + _dataWithMeanGlob->setGlobalObservables(gm); + + _workspace = std::make_unique("workspace", "workspace"); + _workspace->import(modelc); + } + + // reset the parameter values to initial values before fits + void resetParameters() + { + std::vector names{"x", "m", "s", "gm", "gs"}; + std::vector values{0.0, 10.0, 2.0, 11.0, 1.0}; + for (std::size_t i = 0; i < names.size(); ++i) { + auto *var = _workspace->var(names[i].c_str()); + var->setVal(values[i]); + var->setError(0.0); + } + } + + RooWorkspace &ws() { return *_workspace; } + RooDataSet &data() { return *_data; } + RooDataSet &dataWithMeanSigmaGlobs() { return *_dataWithMeanSigmaGlobs; } + RooDataSet &dataWithMeanGlob() { return *_dataWithMeanGlob; } + RooAbsPdf &model() { return *ws().pdf("model"); } + RooAbsPdf &modelc() { return *ws().pdf("modelc"); } + + std::unique_ptr doFit(RooAbsPdf &model, RooAbsData &data, RooCmdArg const &arg1 = RooCmdArg::none(), + RooCmdArg const &arg2 = RooCmdArg::none(), + RooCmdArg const &arg3 = RooCmdArg::none(), + RooCmdArg const &arg4 = RooCmdArg::none()) + { + using namespace RooFit; + return std::unique_ptr( + model.fitTo(data, Save(), Verbose(false), PrintLevel(-1), arg1, arg2, arg3, arg4)); + } + + void TearDown() override + { + _workspace.reset(); + _data.reset(); + _dataWithMeanSigmaGlobs.reset(); + _data.reset(); + } + +private: + std::unique_ptr _workspace; + std::unique_ptr _data; + std::unique_ptr _dataWithMeanSigmaGlobs; + std::unique_ptr _dataWithMeanGlob; +}; + +TEST_F(TestGlobalObservables, NoConstraints) +{ + using namespace RooFit; + + auto &gm = *ws().var("gm"); + auto &gs = *ws().var("gs"); + + // fit with no constraints + resetParameters(); + auto res1 = doFit(model(), data()); + resetParameters(); + // vary global observable to verify true value is picked up from the dataset + gm.setVal(gm.getVal() + 0.5); + gs.setVal(gs.getVal() + 2.5); + double gmVaryVal = gm.getVal(); + double gsVaryVal = gs.getVal(); + auto res2 = doFit(model(), dataWithMeanSigmaGlobs()); + EXPECT_TRUE(res1->isIdentical(*res2)) << "fitting an unconstrained model " + "gave a different result when unrelated global observables were stored in " + "the dataset"; + + // verify that taking the global observable values from data has not changed + // the values in the model + { + const auto message = "taking the global observable values from data has changed the values in the model"; + EXPECT_EQ(gmVaryVal, gm.getVal()) << message; + EXPECT_EQ(gsVaryVal, gs.getVal()) << message; + } +} + +TEST_F(TestGlobalObservables, InternalConstrains) +{ + using namespace RooFit; + + auto &gm = *ws().var("gm"); + auto &gs = *ws().var("gs"); + + // constrained fit with RooProdPdf + resetParameters(); + auto res1 = doFit(modelc(), data(), GlobalObservables(gm, gs)); + resetParameters(); + // vary global observable to verify true value is picked up from the dataset + gm.setVal(gm.getVal() + 0.5); + gs.setVal(gs.getVal() + 2.5); + double gmVaryVal = gm.getVal(); + double gsVaryVal = gs.getVal(); + auto res2 = doFit(modelc(), dataWithMeanSigmaGlobs()); + EXPECT_TRUE(res1->isIdentical(*res2)) << "fitting an model with internal " + "constraints in a RooPrdPdf gave a different result when global " + "observables were stored in the dataset"; + + // verify that taking the global observable values from data has not changed + // the values in the model + { + const auto message = "taking the global observable values from data has changed the values in the model"; + EXPECT_EQ(gmVaryVal, gm.getVal()) << message; + EXPECT_EQ(gsVaryVal, gs.getVal()) << message; + } +} + +TEST_F(TestGlobalObservables, ExternalConstraints) +{ + using namespace RooFit; + + auto &gm = *ws().var("gm"); + auto &gs = *ws().var("gs"); + auto &mconstraint = *ws().pdf("mconstraint"); + auto &sconstraint = *ws().pdf("sconstraint"); + + // constrained fit with external constraints + resetParameters(); + auto res1 = doFit(model(), data(), ExternalConstraints({mconstraint, sconstraint}), GlobalObservables(gm, gs)); + resetParameters(); + // vary global observable to verify true value is picked up from the dataset + gm.setVal(gm.getVal() + 0.5); + gs.setVal(gs.getVal() + 2.5); + double gmVaryVal = gm.getVal(); + double gsVaryVal = gs.getVal(); + auto res2 = doFit(model(), dataWithMeanSigmaGlobs(), ExternalConstraints({mconstraint, sconstraint})); + EXPECT_TRUE(res1->isIdentical(*res2)) + << "fitting an model with external " + "constraints passed via ExternalConstraints() gave a different result when global " + "observables were stored in the dataset"; + + // verify that taking the global observable values from data has not changed + // the values in the model + { + const auto message = "taking the global observable values from data has changed the values in the model"; + EXPECT_EQ(gmVaryVal, gm.getVal()) << message; + EXPECT_EQ(gsVaryVal, gs.getVal()) << message; + } +} + +TEST_F(TestGlobalObservables, SubsetOfConstraintsFromData) +{ + using namespace RooFit; + + auto &gm = *ws().var("gm"); + auto &gs = *ws().var("gs"); + + // check if only a subset of constraints it taken from data + resetParameters(); + auto res1 = doFit(modelc(), data(), GlobalObservables(gm, gs)); + resetParameters(); + // Vary global observable to verify true value is picked up from the dataset. + // This time we only get gm from the dataset, so we don't vary gs for now. + gm.setVal(gm.getVal() + 0.5); + double gmVaryVal = gm.getVal(); + double gsVaryVal = gs.getVal(); + // if we take the global observables from the model, they have to be constant: + auto res2 = doFit(modelc(), dataWithMeanGlob(), GlobalObservables(gm, gs)); + EXPECT_TRUE(res1->isIdentical(*res2)) << "fitting a constrained model " + "to a dataset that only stores a subset of the defined global observables " + "gave the wrong result"; + + // verify that taking the global observable values from data has not changed + // the values in the model + { + const auto message = "taking the global observable values from data has changed the values in the model"; + EXPECT_EQ(gmVaryVal, gm.getVal()) << message; + EXPECT_EQ(gsVaryVal, gs.getVal()) << message; + } + + resetParameters(); + // Now that we also vary gs, the fit results should not be identical. + gm.setVal(gm.getVal() + 0.5); + gs.setVal(gs.getVal() + 2.5); + gmVaryVal = gm.getVal(); + gsVaryVal = gs.getVal(); + auto res3 = doFit(modelc(), dataWithMeanGlob(), GlobalObservables(gm, gs)); + EXPECT_TRUE(isNotIdentical(*res1, *res3)) + << "fitting a constrained model " + "to a dataset that only stores a subset of the defined global observables " + "gave the wrong result"; + + // verify that taking the global observable values from data has not changed + // the values in the model + { + const auto message = "taking the global observable values from data has changed the values in the model"; + EXPECT_EQ(gmVaryVal, gm.getVal()) << message; + EXPECT_EQ(gsVaryVal, gs.getVal()) << message; + } +} + +TEST_F(TestGlobalObservables, ResetDataToWrongData) +{ + using namespace RooFit; + + auto &gm = *ws().var("gm"); + auto &gs = *ws().var("gs"); + auto &model = modelc(); + + // constrained fit with RooProdPdf + resetParameters(); + auto res1 = doFit(model, data(), GlobalObservables(gm, gs)); + + resetParameters(); + // vary global observable to deliberately store "wrong" values in a cloned dataset + std::unique_ptr wrongData{static_cast(data().Clone())}; + gm.setVal(gm.getVal() + 0.5); + gs.setVal(gs.getVal() + 2.5); + wrongData->setGlobalObservables({gm, gs}); + + // check that the fit works when using the dataset with the correct values + std::unique_ptr nll{model.createNLL(dataWithMeanSigmaGlobs())}; + RooAbsPdf::MinimizerConfig minimizerCfg; + minimizerCfg.doSave = true; + minimizerCfg.printLevel = -1; + auto res2 = model.minimizeNLL(*nll, dataWithMeanSigmaGlobs(), minimizerCfg); + EXPECT_TRUE(res1->isIdentical(*res2)) << "fitting an model with internal " + "constraints in a RooPrdPdf gave a different result when global " + "observables were stored in the dataset"; + + nll->setData(*wrongData); + resetParameters(); + auto res3 = model.minimizeNLL(*nll, *wrongData, minimizerCfg); + + // If resetting the dataset used for the nll worked correctly also for + // global observables, the fit will now give the wrong result. + EXPECT_TRUE(isNotIdentical(*res1, *res3)) + << "resetting the dataset " + "underlying a RooNLLVar didn't change the global observable value, but it " + "should have"; +} + +TEST_F(TestGlobalObservables, ResetDataToCorrectData) +{ + using namespace RooFit; + + auto &gm = *ws().var("gm"); + auto &gs = *ws().var("gs"); + auto &model = modelc(); + + // constrained fit with RooProdPdf + resetParameters(); + auto res1 = doFit(model, data(), GlobalObservables(gm, gs)); + + resetParameters(); + // vary global observable to deliberately store "wrong" values in a cloned dataset + std::unique_ptr wrongData{static_cast(data().Clone())}; + gm.setVal(gm.getVal() + 0.5); + gs.setVal(gs.getVal() + 2.5); + wrongData->setGlobalObservables({gm, gs}); + resetParameters(); + + // check that the fit doesn't work when using the dataset with the wrong values + std::unique_ptr nll{model.createNLL(*wrongData)}; + RooAbsPdf::MinimizerConfig minimizerCfg; + minimizerCfg.doSave = true; + minimizerCfg.printLevel = -1; + auto res2 = model.minimizeNLL(*nll, *wrongData, minimizerCfg); + EXPECT_TRUE(isNotIdentical(*res1, *res2)) << "fitting an model with internal " + "constraints in a RooPrdPdf ignored the global " + "observables stored in the dataset"; + + nll->setData(dataWithMeanSigmaGlobs()); + resetParameters(); + auto res3 = model.minimizeNLL(*nll, dataWithMeanSigmaGlobs(), minimizerCfg); + EXPECT_TRUE(res1->isIdentical(*res3)) << "resetting the dataset " + "underlying a RooNLLVar didn't change the global observable value, but it " + "should have"; +} + +TEST_F(TestGlobalObservables, GlobalObservablesSourceFromModel) +{ + using namespace RooFit; + + auto &gm = *ws().var("gm"); + auto &gs = *ws().var("gs"); + + // constrained fit with RooProdPdf + resetParameters(); + auto res1 = doFit(modelc(), data(), GlobalObservables(gm, gs)); + resetParameters(); + // vary global observable to verify true value is picked up from the dataset + gm.setVal(gm.getVal() + 0.5); + gs.setVal(gs.getVal() + 2.5); + + // verify that fit results are identical when global observable values are + // taken from data + auto res2 = doFit(modelc(), dataWithMeanSigmaGlobs()); + EXPECT_TRUE(res1->isIdentical(*res2)); + + auto res3 = doFit(modelc(), dataWithMeanSigmaGlobs(), GlobalObservablesSource("model")); + + // If the global observable values are indeed taken from the model and not + // from data, the comparison will fail now because we have changed the + // global observable values of the model after the first fit. + EXPECT_TRUE(isNotIdentical(*res2, *res3)); +} + +TEST_F(TestGlobalObservables, ResetDataButSourceFromModel) +{ + using namespace RooFit; + + auto &gm = *ws().var("gm"); + auto &gs = *ws().var("gs"); + auto &model = modelc(); + + // constrained fit with RooProdPdf + resetParameters(); + auto res1 = doFit(model, data(), GlobalObservables(gm, gs)); + + resetParameters(); + // vary global observable to deliberately store "wrong" values in a cloned dataset + std::unique_ptr wrongData{static_cast(data().Clone())}; + gm.setVal(gm.getVal() + 0.5); + gs.setVal(gs.getVal() + 2.5); + wrongData->setGlobalObservables({gm, gs}); + + resetParameters(); + + // check that the fit works when using the dataset with the correct values + std::unique_ptr nll{ + model.createNLL(dataWithMeanSigmaGlobs(), GlobalObservablesSource("model"), GlobalObservables(gm, gs))}; + RooAbsPdf::MinimizerConfig minimizerCfg; + minimizerCfg.doSave = true; + minimizerCfg.printLevel = -1; + auto res2 = model.minimizeNLL(*nll, dataWithMeanSigmaGlobs(), minimizerCfg); + EXPECT_TRUE(res1->isIdentical(*res2)); + + nll->setData(*wrongData); + resetParameters(); + auto res3 = model.minimizeNLL(*nll, *wrongData, minimizerCfg); + + // this time it should still be identical because even though we reset to + // the wrong data, we set the global observables source to "model" + EXPECT_TRUE(res1->isIdentical(*res3)); +}