diff --git a/documentation/doxygen/converttonotebook.py b/documentation/doxygen/converttonotebook.py index f8cd82ff76337..8a0861baa49d5 100644 --- a/documentation/doxygen/converttonotebook.py +++ b/documentation/doxygen/converttonotebook.py @@ -64,7 +64,7 @@ # for the script to work correctly. gTypesList = ["void", "int", "Int_t", "TF1", "string", "bool", "double", "float", "char", "TCanvas", "TTree", "TString", "TseqCollection", "Double_t", "TFile", "Long64_t", "Bool_t", "TH1", - "RooDataSet", "RooWorkspace"] + "RooDataSet", "RooWorkspace" , "HypoTestInverterResult" , "TVectorD" , "TArrayF"] # ------------------------------------- # -------- Fuction definitions--------- @@ -380,7 +380,7 @@ def split(text): for cpptype in gTypesList: functionReString += ("^%s|") % cpptype - functionReString = functionReString[:-1] + r")\s?\*?\s?[\w:]*?\s?\([^\)]*\)\s*\{.*?^\}" + functionReString = functionReString[:-1] + r")\s?\*?&?\s?[\w:]*?\s?\([^\)]*\)\s*\{.*?^\}" functionRe = re.compile(functionReString, flags = re.DOTALL | re.MULTILINE) #functionre = re.compile(r'(^void|^int|^Int_t|^TF1|^string|^bool|^double|^float|^char|^TCanvas|^TTree|^TString|^TSeqCollection|^Double_t|^TFile|^Long64_t|^Bool_t)\s?\*?\s?[\w:]*?\s?\([^\)]*\)\s*\{.*?^\}', flags = re.DOTALL | re.MULTILINE) @@ -458,7 +458,7 @@ def findFunctionName(text): #functionnamere = re.compile(r'(?<=(?<=int)|(?<=void)|(?<=TF1)|(?<=Int_t)|(?<=string)|(?<=double)|(?<=Double_t)|(?<=float)|(?<=char)|(?<=TString)|(?<=bool)|(?<=TSeqCollection)|(?<=TCanvas)|(?<=TTree)|(?<=TFile)|(?<=Long64_t)|(?<=Bool_t))\s?\*?\s?[^\s]*?(?=\s?\()', flags = re.DOTALL | re.MULTILINE) match = functionNameRe.search(text) - functionname = match.group().strip(" *") + functionname = match.group().strip(" *\n") return functionname @@ -527,8 +527,18 @@ def changeString(matchObject): newcode = re.sub("#\s\s\w\s[\w-]\s\w.*", changeString , code) return newcode +def roostatsRoofitDeclaceNamespace(code): + if "using namespace RooFit;\nusing namespace RooStats;" in code: + code = code.replace("using namespace RooFit;\nusing namespace RooStats;", "# \n%%cpp -d\n// This is a workaround to make sure the namespace is used inside functions\nusing namespace RooFit;\nusing namespace RooStats;") + + else: + code = code.replace("using namespace RooFit;", "# \n%%cpp -d\n// This is a workaround to make sure the namespace is used inside functions\nusing namespace RooFit;") + code = code.replace("using namespace RooStats;", "# \n%%cpp -d\n// This is a workaround to make sure the namespace is used inside functions\nusing namespace RooStats;") + return code + + def fixes(code): - codeTransformers=[removePaletteEditor, runEventExe, getLibMathMore, roofitRemoveSpacesComments] + codeTransformers=[removePaletteEditor, runEventExe, getLibMathMore, roofitRemoveSpacesComments, roostatsRoofitDeclaceNamespace] for transformer in codeTransformers: code = transformer(code) diff --git a/tutorials/cont/cnt001_basictseq.C b/tutorials/cont/cnt001_basictseq.C index d1d5b2ee483c4..7a9fae7c38467 100644 --- a/tutorials/cont/cnt001_basictseq.C +++ b/tutorials/cont/cnt001_basictseq.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_cont +/// \notebook -nodraw /// Example showing possible usages of the TSeq class. /// \macro_code /// @@ -9,30 +10,30 @@ using namespace ROOT; void cnt001_basictseq() { - cout << "Loop on sequence of integers from 0 to 10" << endl; - for (auto i : TSeqI(10)) { - cout << "Element " << i << endl; - } - - cout << "Loop on sequence of integers from 3 to 29 in steps of 6" << endl; - for (auto i : TSeqI(-5, 29, 6)) { - cout << "Element " << i << endl; - } - - cout << "Loop backwards on sequence of integers from 50 to 30 in steps of 3" << endl; - for (auto i : TSeqI(50, 30, -3)) { - cout << "Element " << i << endl; - } - - cout << "stl algorithm, for_each" << endl; - TSeqUL ulSeq(2,30,3); - std::for_each(std::begin(ulSeq),std::end(ulSeq),[](ULong_t i){cout << "For each: " << i <SetSeed(4357); - TFoam *FoamX = new TFoam("FoamX"); // Create Simulator - FoamX->SetkDim(2); // No. of dimensions, obligatory! - FoamX->SetnCells(500); // Optionally No. of cells, default=2000 - FoamX->SetRhoInt(Camel2); // Set 2-dim distribution, included below - FoamX->SetPseRan(PseRan); // Set random number generator - FoamX->Initialize(); // Initialize simulator, may take time... - // - // visualising generated distribution - TCanvas *cKanwa = new TCanvas("cKanwa","Canvas for plotting",600,600); - cKanwa->cd(); - // From now on FoamX is ready to generate events - int nshow=5000; - for(long loop=0; loop<100000; loop++){ - FoamX->MakeEvent(); // generate MC event - FoamX->GetMCvect( MCvect); // get generated vector (x,y) - Double_t x=MCvect[0]; - Double_t y=MCvect[1]; - if(loop<10) cout<<"(x,y) = ( "<< x <<", "<< y <<" )"<Fill(x,y); - // live plot - if(loop == nshow){ - nshow += 5000; - hst_xy->Draw("lego2"); - cKanwa->Update(); - } - }// loop - // - hst_xy->Draw("lego2"); // final plot - cKanwa->Update(); - // - Double_t MCresult, MCerror; - FoamX->GetIntegMC( MCresult, MCerror); // get MC integral, should be one - cout << " MCresult= " << MCresult << " +- " << MCerror <SetSeed(4357); + TFoam *FoamX = new TFoam("FoamX"); // Create Simulator + FoamX->SetkDim(2); // No. of dimensions, obligatory! + FoamX->SetnCells(500); // Optionally No. of cells, default=2000 + FoamX->SetRhoInt(Camel2); // Set 2-dim distribution, included below + FoamX->SetPseRan(PseRan); // Set random number generator + FoamX->Initialize(); // Initialize simulator, may take time... + // + // visualising generated distribution + TCanvas *cKanwa = new TCanvas("cKanwa","Canvas for plotting",600,600); + cKanwa->cd(); + // From now on FoamX is ready to generate events + int nshow=5000; + for(long loop=0; loop<100000; loop++){ + FoamX->MakeEvent(); // generate MC event + FoamX->GetMCvect( MCvect); // get generated vector (x,y) + Double_t x=MCvect[0]; + Double_t y=MCvect[1]; + if(loop<10) cout<<"(x,y) = ( "<< x <<", "<< y <<" )"<Fill(x,y); + // live plot + if(loop == nshow){ + nshow += 5000; + hst_xy->Draw("lego2"); + cKanwa->Update(); + } + }// loop + // + hst_xy->Draw("lego2"); // final plot + cKanwa->Update(); + // + Double_t MCresult, MCerror; + FoamX->GetIntegMC( MCresult, MCerror); // get MC integral, should be one + cout << " MCresult= " << MCresult << " +- " << MCerror < 10) { - std::cout << "2 <= msize <= 10" < 6 x 6 but for smaller sizes, the -// inversion is performed according to Cramer's rule by explicitly calculating -// all Jacobi's sub-determinants . For instance for a 6 x 6 matrix this means: -// # of 5 x 5 determinant : 36 -// # of 4 x 4 determinant : 75 -// # of 3 x 3 determinant : 80 -// # of 2 x 2 determinant : 45 (see TMatrixD/FCramerInv.cxx) -// -// The only "quality" control in this process is to check whether the 6 x 6 -// determinant is unequal 0 . But speed gains are significant compared to Invert() , -// up to an order of magnitude for sizes <= 4 x 4 -// -// The inversion is done "in place", so the original matrix will be overwritten -// If a pointer to a Double_t is supplied the determinant is calculated -// - - std::cout << "1. Use .InvertFast(&det)" < 6) - std::cout << " for ("< 1 -// - The last step is a standard forward/backward substitution . -// -// It is important to realize that both InvertFast() and Invert() are "one-shot" deals , speed -// comes at a price . If something goes wrong because the matrix is (near) singular, you have -// overwritten your original matrix and no factorization is available anymore to get more -// information like condition number or change the tolerance number . -// -// All other calls in the matrix classes involving inversion like the ones with the "smart" -// constructors (kInverted,kInvMult...) use this inversion method . -// - - std::cout << "2. Use .Invert(&det)" << std::endl; - - Double_t det2; - TMatrixD H2 = H_square; - H2.Invert(&det2); - - TMatrixD U2(H2,TMatrixD::kMult,H_square); - TMatrixDDiag diag2(U2); diag2 = 0.0; - const Double_t U2_max_offdiag = (U2.Abs()).Max(); - std::cout << " Maximum off-diagonal = " << U2_max_offdiag << std::endl; - std::cout << " Determinant = " << det2 << std::endl; - -// 3. Inversion through LU decomposition -// The (default) algorithms used are similar to 2. (Not identical because in 2, the whole -// calculation is done "in-place". Here the original matrix is copied (so more memory -// management => slower) and several operations can be performed without having to repeat -// the decomposition step . -// Inverting a matrix is nothing else than solving a set of equations where the rhs is given -// by the unit matrix, so the steps to take are identical to those solving a linear equation : -// - - std::cout << "3. Use TDecompLU" << std::endl; - - TMatrixD H3 = H_square; - TDecompLU lu(H_square); - - // Any operation that requires a decomposition will trigger it . The class keeps - // an internal state so that following operations will not perform the decomposition again - // unless the matrix is changed through SetMatrix(..) - // One might want to proceed more cautiously by invoking first Decompose() and check its - // return value before proceeding.... - - lu.Invert(H3); - Double_t d1_lu; Double_t d2_lu; - lu.Det(d1_lu,d2_lu); - Double_t det3 = d1_lu*TMath::Power(2.,d2_lu); - - TMatrixD U3(H3,TMatrixD::kMult,H_square); - TMatrixDDiag diag3(U3); diag3 = 0.0; - const Double_t U3_max_offdiag = (U3.Abs()).Max(); - std::cout << " Maximum off-diagonal = " << U3_max_offdiag << std::endl; - std::cout << " Determinant = " << det3 << std::endl; - -// 4. Inversion through SVD decomposition -// For SVD and QRH, the (n x m) matrix does only have to fulfill n >=m . In case n > m -// a pseudo-inverse is calculated - std::cout << "4. Use TDecompSVD on non-square matrix" << std::endl; - - TMatrixD H_nsquare = THilbertMatrixD(msize,msize-1); - - TDecompSVD svd(H_nsquare); - - TMatrixD H4 = svd.Invert(); - Double_t d1_svd; Double_t d2_svd; - svd.Det(d1_svd,d2_svd); - Double_t det4 = d1_svd*TMath::Power(2.,d2_svd); - - TMatrixD U4(H4,TMatrixD::kMult,H_nsquare); - TMatrixDDiag diag4(U4); diag4 = 0.0; - const Double_t U4_max_offdiag = (U4.Abs()).Max(); - std::cout << " Maximum off-diagonal = " << U4_max_offdiag << std::endl; - std::cout << " Determinant = " << det4 << std::endl; -} + if (msize < 2 || msize > 10) { + std::cout << "2 <= msize <= 10" < 6 x 6 but for smaller sizes, the + // inversion is performed according to Cramer's rule by explicitly calculating + // all Jacobi's sub-determinants . For instance for a 6 x 6 matrix this means: + // # of 5 x 5 determinant : 36 + // # of 4 x 4 determinant : 75 + // # of 3 x 3 determinant : 80 + // # of 2 x 2 determinant : 45 (see TMatrixD/FCramerInv.cxx) + // + // The only "quality" control in this process is to check whether the 6 x 6 + // determinant is unequal 0 . But speed gains are significant compared to Invert() , + // up to an order of magnitude for sizes <= 4 x 4 + // + // The inversion is done "in place", so the original matrix will be overwritten + // If a pointer to a Double_t is supplied the determinant is calculated + // + + std::cout << "1. Use .InvertFast(&det)" < 6) + std::cout << " for ("< 1 + // - The last step is a standard forward/backward substitution . + // + // It is important to realize that both InvertFast() and Invert() are "one-shot" deals , speed + // comes at a price . If something goes wrong because the matrix is (near) singular, you have + // overwritten your original matrix and no factorization is available anymore to get more + // information like condition number or change the tolerance number . + // + // All other calls in the matrix classes involving inversion like the ones with the "smart" + // constructors (kInverted,kInvMult...) use this inversion method . + // + + std::cout << "2. Use .Invert(&det)" << std::endl; + + Double_t det2; + TMatrixD H2 = H_square; + H2.Invert(&det2); + + TMatrixD U2(H2,TMatrixD::kMult,H_square); + TMatrixDDiag diag2(U2); diag2 = 0.0; + const Double_t U2_max_offdiag = (U2.Abs()).Max(); + std::cout << " Maximum off-diagonal = " << U2_max_offdiag << std::endl; + std::cout << " Determinant = " << det2 << std::endl; + + // 3. Inversion through LU decomposition + // The (default) algorithms used are similar to 2. (Not identical because in 2, the whole + // calculation is done "in-place". Here the original matrix is copied (so more memory + // management => slower) and several operations can be performed without having to repeat + // the decomposition step . + // Inverting a matrix is nothing else than solving a set of equations where the rhs is given + // by the unit matrix, so the steps to take are identical to those solving a linear equation : + // + + std::cout << "3. Use TDecompLU" << std::endl; + + TMatrixD H3 = H_square; + TDecompLU lu(H_square); + + // Any operation that requires a decomposition will trigger it . The class keeps + // an internal state so that following operations will not perform the decomposition again + // unless the matrix is changed through SetMatrix(..) + // One might want to proceed more cautiously by invoking first Decompose() and check its + // return value before proceeding.... + + lu.Invert(H3); + Double_t d1_lu; Double_t d2_lu; + lu.Det(d1_lu,d2_lu); + Double_t det3 = d1_lu*TMath::Power(2.,d2_lu); + + TMatrixD U3(H3,TMatrixD::kMult,H_square); + TMatrixDDiag diag3(U3); diag3 = 0.0; + const Double_t U3_max_offdiag = (U3.Abs()).Max(); + std::cout << " Maximum off-diagonal = " << U3_max_offdiag << std::endl; + std::cout << " Determinant = " << det3 << std::endl; + + // 4. Inversion through SVD decomposition + // For SVD and QRH, the (n x m) matrix does only have to fulfill n >=m . In case n > m + // a pseudo-inverse is calculated + std::cout << "4. Use TDecompSVD on non-square matrix" << std::endl; + + TMatrixD H_nsquare = THilbertMatrixD(msize,msize-1); + + TDecompSVD svd(H_nsquare); + + TMatrixD H4 = svd.Invert(); + Double_t d1_svd; Double_t d2_svd; + svd.Det(d1_svd,d2_svd); + Double_t det4 = d1_svd*TMath::Power(2.,d2_svd); + + TMatrixD U4(H4,TMatrixD::kMult,H_nsquare); + TMatrixDDiag diag4(U4); diag4 = 0.0; + const Double_t U4_max_offdiag = (U4.Abs()).Max(); + std::cout << " Maximum off-diagonal = " << U4_max_offdiag << std::endl; + std::cout << " Determinant = " << det4 << std::endl; + } diff --git a/tutorials/matrix/solveLinear.C b/tutorials/matrix/solveLinear.C index 45ac8b238ff02..55d19bad602b3 100644 --- a/tutorials/matrix/solveLinear.C +++ b/tutorials/matrix/solveLinear.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_matrix +/// \notebook /// This macro shows several ways to perform a linear least-squares /// analysis . To keep things simple we fit a straight line to 4 /// data points diff --git a/tutorials/multicore/mp001_fillHistos.C b/tutorials/multicore/mp001_fillHistos.C index 8c243cc24d425..9b4b727d0647e 100644 --- a/tutorials/multicore/mp001_fillHistos.C +++ b/tutorials/multicore/mp001_fillHistos.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_multicore +/// \notebook -nodraw /// Fill histograms in parallel and write them on file. /// This example expresses the parallelism of the mt001_fillHistos.C tutorial /// with multiprocessing techniques. diff --git a/tutorials/multicore/mp101_fillNtuples.C b/tutorials/multicore/mp101_fillNtuples.C index 14d33c50770ab..d26d35c11e4bd 100644 --- a/tutorials/multicore/mp101_fillNtuples.C +++ b/tutorials/multicore/mp101_fillNtuples.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_multicore +/// \notebook -nodraw /// Fill n-tuples in distinct workers. /// This tutorial illustrates the basics of how it's possible with ROOT to /// offload heavy operations on multiple processes and how it's possible to write diff --git a/tutorials/multicore/mp102_readNtuplesFillHistosAndFit.C b/tutorials/multicore/mp102_readNtuplesFillHistosAndFit.C index d6dc9ad0cae67..2c612e526db55 100644 --- a/tutorials/multicore/mp102_readNtuplesFillHistosAndFit.C +++ b/tutorials/multicore/mp102_readNtuplesFillHistosAndFit.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_multicore +/// \notebook -js /// Read n-tuples in distinct workers, fill histograms, merge them and fit. /// We express parallelism with multiprocessing as it is done with multithreading /// in mt102_readNtuplesFillHistosAndFit. diff --git a/tutorials/multicore/mp201_parallelHistoFill.C b/tutorials/multicore/mp201_parallelHistoFill.C index 3cef48105d133..091275d360bf7 100644 --- a/tutorials/multicore/mp201_parallelHistoFill.C +++ b/tutorials/multicore/mp201_parallelHistoFill.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_multicore +/// \notebook -js /// Parallel fill of a histogram /// This tutorial shows how a histogram can be filled in parallel /// with a multiprocess approach. diff --git a/tutorials/physics/PhaseSpace.C b/tutorials/physics/PhaseSpace.C index 3b3749f0aedfd..1da0dabf8fd43 100644 --- a/tutorials/physics/PhaseSpace.C +++ b/tutorials/physics/PhaseSpace.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_physics +/// \notebook -js /// Example of use of TGenPhaseSpace /// /// \macro_image diff --git a/tutorials/quadp/portfolio.C b/tutorials/quadp/portfolio.C index 71f65d091f1fc..b31af3871e020 100644 --- a/tutorials/quadp/portfolio.C +++ b/tutorials/quadp/portfolio.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_quadp +/// \notebook /// This macro shows in detail the use of the quadratic programming package quadp . /// Running this macro : /// @@ -291,98 +292,98 @@ TVectorD OptimalInvest(Double_t riskFactor,TVectorD r,TMatrixDSym Covar) //--------------------------------------------------------------------------- void portfolio() { - const Int_t sDay = 20000809; - const Int_t eDay = 20040602; - - const char *fname = "stock.root"; - TFile *f = 0; - if (!gSystem->AccessPathName(fname)) { - f = TFile::Open(fname); - } else { - printf("accessing %s file from http://root.cern.ch/files\n",fname); - f = TFile::Open(Form("http://root.cern.ch/files/%s",fname)); - } - if (!f) return; - - TArrayF *data = new TArrayF[nrStocks]; - for (Int_t i = 0; i < nrStocks; i++) { - const TString symbol = stocks[i]; - data[i] = StockReturn(f,symbol,sDay,eDay); - } - - const Int_t nrData = data[0].GetSize(); - - TVectorD r(nrStocks); - for (Int_t i = 0; i < nrStocks; i++) - r[i] = data[i].GetSum()/nrData; - - TMatrixDSym Covar(nrStocks); - for (Int_t i = 0; i < nrStocks; i++) { - for (Int_t j = 0; j <= i; j++) { - Double_t sum = 0.; - for (Int_t k = 0; k < nrData; k++) - sum += (data[i][k]-r[i])*(data[j][k]-r[j]); - Covar(i,j) = Covar(j,i) = sum/nrData; - } - } - - const TVectorD weight1 = OptimalInvest(2.0,r,Covar); - const TVectorD weight2 = OptimalInvest(10.,r,Covar); - - cout << "stock daily daily w1 w2" <Divide(1,2); - - // utility function / risk profile - - c1->cd(1); - gPad->SetGridx(); - gPad->SetGridy(); - - TF1 *f1 = new TF1("f1",RiskProfile,0,2.5,1); - f1->SetParameter(0,2.0); - f1->SetLineColor(49); - f1->Draw("AC"); - f1->GetHistogram()->SetXTitle("dollar"); - f1->GetHistogram()->SetYTitle("utility"); - f1->GetHistogram()->SetMinimum(0.0); - f1->GetHistogram()->SetMaximum(1.0); - TF1 *f2 = new TF1("f2",RiskProfile,0,2.5,1); - f2->SetParameter(0,10.); - f2->SetLineColor(50); - f2->Draw("CSAME"); - - TLegend *legend1 = new TLegend(0.50,0.65,0.70,0.82); - legend1->AddEntry(f1,"1-exp(-2.0*x)","l"); - legend1->AddEntry(f2,"1-exp(-10.*x)","l"); - legend1->Draw(); - - // vertical bar chart of portfolio distribution - - c1->cd(2); - TH1F *h1 = new TH1F("h1","Portfolio Distribution",nrStocks,0,0); - TH1F *h2 = new TH1F("h2","Portfolio Distribution",nrStocks,0,0); - h1->SetStats(0); - h1->SetFillColor(49); - h2->SetFillColor(50); - h1->SetBarWidth(0.45); - h1->SetBarOffset(0.1); - h2->SetBarWidth(0.4); - h2->SetBarOffset(0.55); - for (Int_t i = 0; i < nrStocks; i++) { - h1->Fill(stocks[i],weight1[i]); - h2->Fill(stocks[i],weight2[i]); - } - - h1->Draw("BAR2 HIST"); - h2->Draw("BAR2SAME HIST"); - - TLegend *legend2 = new TLegend(0.50,0.65,0.70,0.82); - legend2->AddEntry(h1,"high risk","f"); - legend2->AddEntry(h2,"low risk","f"); - legend2->Draw(); + const Int_t sDay = 20000809; + const Int_t eDay = 20040602; + + const char *fname = "stock.root"; + TFile *f = 0; + if (!gSystem->AccessPathName(fname)) { + f = TFile::Open(fname); + } else { + printf("accessing %s file from http://root.cern.ch/files\n",fname); + f = TFile::Open(Form("http://root.cern.ch/files/%s",fname)); + } + if (!f) return; + + TArrayF *data = new TArrayF[nrStocks]; + for (Int_t i = 0; i < nrStocks; i++) { + const TString symbol = stocks[i]; + data[i] = StockReturn(f,symbol,sDay,eDay); + } + + const Int_t nrData = data[0].GetSize(); + + TVectorD r(nrStocks); + for (Int_t i = 0; i < nrStocks; i++) + r[i] = data[i].GetSum()/nrData; + + TMatrixDSym Covar(nrStocks); + for (Int_t i = 0; i < nrStocks; i++) { + for (Int_t j = 0; j <= i; j++) { + Double_t sum = 0.; + for (Int_t k = 0; k < nrData; k++) + sum += (data[i][k]-r[i])*(data[j][k]-r[j]); + Covar(i,j) = Covar(j,i) = sum/nrData; + } + } + + const TVectorD weight1 = OptimalInvest(2.0,r,Covar); + const TVectorD weight2 = OptimalInvest(10.,r,Covar); + + cout << "stock daily daily w1 w2" <Divide(1,2); + + // utility function / risk profile + + c1->cd(1); + gPad->SetGridx(); + gPad->SetGridy(); + + TF1 *f1 = new TF1("f1",RiskProfile,0,2.5,1); + f1->SetParameter(0,2.0); + f1->SetLineColor(49); + f1->Draw("AC"); + f1->GetHistogram()->SetXTitle("dollar"); + f1->GetHistogram()->SetYTitle("utility"); + f1->GetHistogram()->SetMinimum(0.0); + f1->GetHistogram()->SetMaximum(1.0); + TF1 *f2 = new TF1("f2",RiskProfile,0,2.5,1); + f2->SetParameter(0,10.); + f2->SetLineColor(50); + f2->Draw("CSAME"); + + TLegend *legend1 = new TLegend(0.50,0.65,0.70,0.82); + legend1->AddEntry(f1,"1-exp(-2.0*x)","l"); + legend1->AddEntry(f2,"1-exp(-10.*x)","l"); + legend1->Draw(); + + // vertical bar chart of portfolio distribution + + c1->cd(2); + TH1F *h1 = new TH1F("h1","Portfolio Distribution",nrStocks,0,0); + TH1F *h2 = new TH1F("h2","Portfolio Distribution",nrStocks,0,0); + h1->SetStats(0); + h1->SetFillColor(49); + h2->SetFillColor(50); + h1->SetBarWidth(0.45); + h1->SetBarOffset(0.1); + h2->SetBarWidth(0.4); + h2->SetBarOffset(0.55); + for (Int_t i = 0; i < nrStocks; i++) { + h1->Fill(stocks[i],weight1[i]); + h2->Fill(stocks[i],weight2[i]); + } + + h1->Draw("BAR2 HIST"); + h2->Draw("BAR2SAME HIST"); + + TLegend *legend2 = new TLegend(0.50,0.65,0.70,0.82); + legend2->AddEntry(h1,"high risk","f"); + legend2->AddEntry(h2,"low risk","f"); + legend2->Draw(); } diff --git a/tutorials/roofit/rf102_dataimport.C b/tutorials/roofit/rf102_dataimport.C index 2d5f253cfca95..17315dd8a677b 100644 --- a/tutorials/roofit/rf102_dataimport.C +++ b/tutorials/roofit/rf102_dataimport.C @@ -128,7 +128,7 @@ void rf102_dataimport() TH1* makeTH1() - { +{ // Create ROOT TH1 filled with a Gaussian distribution TH1D* hh = new TH1D("hh","hh",25,-10,10) ; diff --git a/tutorials/roofit/rf104_classfactory.C b/tutorials/roofit/rf104_classfactory.C index 3d80524e25c3e..be5f53f889368 100644 --- a/tutorials/roofit/rf104_classfactory.C +++ b/tutorials/roofit/rf104_classfactory.C @@ -1,6 +1,6 @@ /// \file /// \ingroup tutorial_roofit -/// \notebook +/// \notebook -js /// 'BASIC FUNCTIONALITY' RooFit tutorial macro #104 /// /// The class factory for functions and p.d.f.s diff --git a/tutorials/roofit/rf512_wsfactory_oper.C b/tutorials/roofit/rf512_wsfactory_oper.C index c164cca5c787c..98f0eeae1328b 100644 --- a/tutorials/roofit/rf512_wsfactory_oper.C +++ b/tutorials/roofit/rf512_wsfactory_oper.C @@ -1,12 +1,11 @@ /// \file /// \ingroup tutorial_roofit -/// \notebook +/// \notebook -nodraw /// 'ORGANIZATION AND SIMULTANEOUS FITS' RooFit tutorial macro #512 /// /// Illustration of operator expressions and expression-based /// basic p.d.f.s in the workspace factory syntax /// -/// \macro_image /// \macro_output /// \macro_code /// \author 04/2009 - Wouter Verkerke diff --git a/tutorials/roostats/FourBinInstructional.C b/tutorials/roostats/FourBinInstructional.C index 04c41c9d89415..1e0b9d961d07d 100644 --- a/tutorials/roostats/FourBinInstructional.C +++ b/tutorials/roostats/FourBinInstructional.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// This example is a generalization of the on/off problem. /// /// This example is a generalization of the on/off problem. diff --git a/tutorials/roostats/HybridInstructional.C b/tutorials/roostats/HybridInstructional.C index a55881c586bb4..a166a9f8d2aa0 100644 --- a/tutorials/roostats/HybridInstructional.C +++ b/tutorials/roostats/HybridInstructional.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// Example demonstrating usage of HybridCalcultor /// /// A hypothesis testing example based on number counting @@ -116,502 +117,502 @@ ClassImp(BinCountTestStat) void HybridInstructional() { - // This tutorial has 6 parts - // Table of Contents - // Setup - // 1. Make the model for the 'prototype problem' - // Special cases - // 2. Use RooFit's direct integration to get p-value & significance - // 3. Use RooStats analytic solution for this problem - // RooStats HybridCalculator -- can be generalized - // 4. RooStats ToyMC version of 2. & 3. - // 5. RooStats ToyMC with an equivalent test statistic - // 6. RooStats ToyMC with simultaneous control & main measurement - - // It takes ~4 min without PROOF and ~2 min with PROOF on 4 cores. - // Of course, everything looks nicer with more toys, which takes longer. - -#ifdef __CINT__ - cout << "DO NOT RUN WITH CINT: we are using a custom test statistic "; - cout << "which requires that this tutorial must be compiled "; - cout << "with ACLIC" << endl; - return; -#endif - - - TStopwatch t; - t.Start(); - TCanvas *c = new TCanvas; - c->Divide(2,2); - - /////////////////////////////////////////////////////// - // P A R T 1 : D I R E C T I N T E G R A T I O N - ////////////////////////////////////////////////////// - // Make model for prototype on/off problem - // Pois(x | s+b) * Pois(y | tau b ) - // for Z_Gamma, use uniform prior on b. - RooWorkspace* w = new RooWorkspace("w"); - w->factory("Poisson::px(x[150,0,500],sum::splusb(s[0,0,100],b[100,0,300]))"); - w->factory("Poisson::py(y[100,0,500],prod::taub(tau[1.],b))"); - w->factory("PROD::model(px,py)"); - w->factory("Uniform::prior_b(b)"); - - // We will control the output level in a few places to avoid - // verbose progress messages. We start by keeping track - // of the current threshold on messages. - RooFit::MsgLevel msglevel = RooMsgService::instance().globalKillBelow(); - - // Use PROOF-lite on multi-core machines - ProofConfig* pc = NULL; - // uncomment below if you want to use PROOF - // pc = new ProofConfig(*w, 4, "workers=4", kFALSE); // machine with 4 cores - // pc = new ProofConfig(*w, 2, "workers=2", kFALSE); // machine with 2 cores - - /////////////////////////////////////////////////////// - // P A R T 2 : D I R E C T I N T E G R A T I O N - ////////////////////////////////////////////////////// - // This is not the 'RooStats' way, but in this case the distribution - // of the test statistic is simply x and can be calculated directly - // from the PDF using RooFit's built-in integration. - // Note, this does not generalize to situations in which the test statistic - // depends on many events (rows in a dataset). - - // construct the Bayesian-averaged model (eg. a projection pdf) - // p'(x|s) = \int db p(x|s+b) * [ p(y|b) * prior(b) ] - w->factory("PROJ::averagedModel(PROD::foo(px|b,py,prior_b),b)") ; - - RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); // lower message level - // plot it, red is averaged model, green is b known exactly, blue is s+b av model - RooPlot* frame = w->var("x")->frame(Range(50,230)) ; - w->pdf("averagedModel")->plotOn(frame,LineColor(kRed)) ; - w->pdf("px")->plotOn(frame,LineColor(kGreen)) ; - w->var("s")->setVal(50.); - w->pdf("averagedModel")->plotOn(frame,LineColor(kBlue)) ; - c->cd(1); - frame->Draw() ; - w->var("s")->setVal(0.); - - // compare analytic calculation of Z_Bi - // with the numerical RooFit implementation of Z_Gamma - // for an example with x = 150, y = 100 - - // numeric RooFit Z_Gamma - w->var("y")->setVal(100); - w->var("x")->setVal(150); - RooAbsReal* cdf = w->pdf("averagedModel")->createCdf(*w->var("x")); - cdf->getVal(); // get ugly print messages out of the way - cout << "-----------------------------------------"<defineSet("obs","x"); - w->defineSet("poi","s"); - - // create a toy dataset with the x=150 - RooDataSet *data = new RooDataSet("d", "d", *w->set("obs")); - data->add(*w->set("obs")); - - ////////////////////////////////////////////////////////// - // Part 3a : Setup ModelConfigs - // create the null (background-only) ModelConfig with s=0 - ModelConfig b_model("B_model", w); - b_model.SetPdf(*w->pdf("px")); - b_model.SetObservables(*w->set("obs")); - b_model.SetParametersOfInterest(*w->set("poi")); - w->var("s")->setVal(0.0); // important! - b_model.SetSnapshot(*w->set("poi")); - - // create the alternate (signal+background) ModelConfig with s=50 - ModelConfig sb_model("S+B_model", w); - sb_model.SetPdf(*w->pdf("px")); - sb_model.SetObservables(*w->set("obs")); - sb_model.SetParametersOfInterest(*w->set("poi")); - w->var("s")->setVal(50.0); // important! - sb_model.SetSnapshot(*w->set("poi")); - - - ////////////////////////////////////////////////////////// - // Part 3b : Choose Test Statistic - // To make an equivalent calculation we need to use x as the test - // statistic. This is not a built-in test statistic in RooStats - // so we define it above. The new class inherits from the - // RooStats::TestStatistic interface, and simply returns the value - // of x in the dataset. - - BinCountTestStat binCount("x"); - - ////////////////////////////////////////////////////////// - // Part 3c : Define Prior used to randomize nuisance parameters - // - // The prior used for the hybrid calculator is the posterior - // from the auxiliary measurement y. The model for the aux. - // measurement is Pois(y|tau*b), thus the likleihood function - // is proportional to (has the form of) a Gamma distribution. - // if the 'original prior' \eta(b) is uniform, then from - // Bayes's theorem we have the posterior: - // \pi(b) = Pois(y|tau*b) * \eta(b) - // If \eta(b) is flat, then we arrive at a Gamma distribution. - // Since RooFit will normalize the PDF we can actually supply - // py=Pois(y,tau*b) that will be equivalent to multiplying by a uniform. - // - // Alternatively, we could explicitly use a gamma distribution: - // w->factory("Gamma::gamma(b,sum::temp(y,1),1,0)"); - // - // or we can use some other ad hoc prior that do not naturally - // follow from the known form of the auxiliary measurement. - // The common choice is the equivalent Gaussian: - w->factory("Gaussian::gauss_prior(b,y, expr::sqrty('sqrt(y)',y))"); - // this corresponds to the "Z_N" calculation. - // - // or one could use the analogous log-normal prior - w->factory("Lognormal::lognorm_prior(b,y, expr::kappa('1+1./sqrt(y)',y))"); - // - // Ideally, the HybridCalculator would be able to inspect the full - // model Pois(x | s+b) * Pois(y | tau b ) and be given the original - // prior \eta(b) to form \pi(b) = Pois(y|tau*b) * \eta(b). - // This is not yet implemented because in the general case - // it is not easy to identify the terms in the PDF that correspond - // to the auxiliary measurement. So for now, it must be set - // explicitly with: - // - ForcePriorNuisanceNull() - // - ForcePriorNuisanceAlt() - // the name "ForcePriorNuisance" was chosen because we anticipate - // this to be auto-detected, but will leave the option open - // to force to a different prior for the nuisance parameters. - - ////////////////////////////////////////////////////////// - // Part 3d : Construct and configure the HybridCalculator - - HybridCalculator hc1(*data, sb_model, b_model); - ToyMCSampler *toymcs1 = (ToyMCSampler*)hc1.GetTestStatSampler(); - toymcs1->SetNEventsPerToy(1); // because the model is in number counting form - toymcs1->SetTestStatistic(&binCount); // set the test statistic - hc1.SetToys(20000,1000); - hc1.ForcePriorNuisanceAlt(*w->pdf("py")); - hc1.ForcePriorNuisanceNull(*w->pdf("py")); - // if you wanted to use the ad hoc Gaussian prior instead - // hc1.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); - // hc1.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); - // if you wanted to use the ad hoc log-normal prior instead - // hc1.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); - // hc1.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); - - // enable proof - // NOTE: This test statistic is defined in this macro, and is not - // working with PROOF currently. Luckily test stat is fast to evaluate. - // if(pc) toymcs1->SetProofConfig(pc); - - // these lines save current msg level and then kill any messages below ERROR - RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); - // Get the result - HypoTestResult *r1 = hc1.GetHypoTest(); - RooMsgService::instance().setGlobalKillBelow(msglevel); // set it back - cout << "-----------------------------------------"<Print(); - t.Stop(); t.Print(); t.Reset(); t.Start(); - - c->cd(2); - HypoTestPlot *p1 = new HypoTestPlot(*r1,30); // 30 bins, TS is discrete - p1->Draw(); - - //////////////////////////////////////////////////////////////////////////// - // P A R T 5 : U S I N G H Y B R I D C A L C U L A T O R W I T H - // A N A L T E R N A T I V E T E S T S T A T I S T I C - ///////////////////////////////////////////////////////////////////////////// - // - // A likelihood ratio test statistics should be 1-to-1 with the count x - // when the value of b is fixed in the likelihood. This is implemented - // by the SimpleLikelihoodRatioTestStat - - SimpleLikelihoodRatioTestStat slrts(*b_model.GetPdf(),*sb_model.GetPdf()); - slrts.SetNullParameters(*b_model.GetSnapshot()); - slrts.SetAltParameters(*sb_model.GetSnapshot()); - - // HYBRID CALCULATOR - HybridCalculator hc2(*data, sb_model, b_model); - ToyMCSampler *toymcs2 = (ToyMCSampler*)hc2.GetTestStatSampler(); - toymcs2->SetNEventsPerToy(1); - toymcs2->SetTestStatistic(&slrts); - hc2.SetToys(20000,1000); - hc2.ForcePriorNuisanceAlt(*w->pdf("py")); - hc2.ForcePriorNuisanceNull(*w->pdf("py")); - // if you wanted to use the ad hoc Gaussian prior instead - // hc2.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); - // hc2.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); - // if you wanted to use the ad hoc log-normal prior instead - // hc2.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); - // hc2.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); - - // enable proof - if(pc) toymcs2->SetProofConfig(pc); - - // these lines save current msg level and then kill any messages below ERROR - RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); - // Get the result - HypoTestResult *r2 = hc2.GetHypoTest(); - cout << "-----------------------------------------"<Print(); - t.Stop(); t.Print(); t.Reset(); t.Start(); - RooMsgService::instance().setGlobalKillBelow(msglevel); - - c->cd(3); - HypoTestPlot *p2 = new HypoTestPlot(*r2,30); // 30 bins - p2->Draw(); - - //////////////////////////////////////////////////////////////////////////// - // P A R T 6 : U S I N G H Y B R I D C A L C U L A T O R W I T H - // A N A L T E R N A T I V E T E S T S T A T I S T I C - // A N D S I M U L T A N E O U S M O D E L - ///////////////////////////////////////////////////////////////////////////// - // - // If one wants to use a test statistic in which the nuisance parameters - // are profiled (in one way or another), then the PDF must constrain b. - // Otherwise any observation x can always be explained with s=0 and b=x/tau. - // - // In this case, one is really thinking about the problem in a - // different way. They are considering x,y simultaneously. - // and the PDF should be Pois(x | s+b) * Pois(y | tau b ) - // and the set 'obs' should be {x,y}. - - w->defineSet("obsXY","x,y"); - - // create a toy dataset with the x=150, y=100 - w->var("x")->setVal(150.); - w->var("y")->setVal(100.); - RooDataSet *dataXY = new RooDataSet("dXY", "dXY", *w->set("obsXY")); - dataXY->add(*w->set("obsXY")); - - // now we need new model configs, with PDF="model" - ModelConfig b_modelXY("B_modelXY", w); - b_modelXY.SetPdf(*w->pdf("model")); // IMPORTANT - b_modelXY.SetObservables(*w->set("obsXY")); - b_modelXY.SetParametersOfInterest(*w->set("poi")); - w->var("s")->setVal(0.0); // IMPORTANT - b_modelXY.SetSnapshot(*w->set("poi")); - - // create the alternate (signal+background) ModelConfig with s=50 - ModelConfig sb_modelXY("S+B_modelXY", w); - sb_modelXY.SetPdf(*w->pdf("model")); // IMPORTANT - sb_modelXY.SetObservables(*w->set("obsXY")); - sb_modelXY.SetParametersOfInterest(*w->set("poi")); - w->var("s")->setVal(50.0); // IMPORTANT - sb_modelXY.SetSnapshot(*w->set("poi")); - - // without this print, their can be a crash when using PROOF. Strange. - // w->Print(); - - // Test statistics like the profile likelihood ratio - // (or the ratio of profiled likelihoods (Tevatron) or the MLE for s) - // will now work, since the nuisance parameter b is constrained by y. - // ratio of alt and null likelihoods with background yield profiled. - // - // NOTE: These are slower because they have to run fits for each toy - - // Tevatron-style Ratio of profiled likelihoods - // Q_Tev = -log L(s=0,\hat\hat{b})/L(s=50,\hat\hat{b}) - RatioOfProfiledLikelihoodsTestStat - ropl(*b_modelXY.GetPdf(), *sb_modelXY.GetPdf(), sb_modelXY.GetSnapshot()); - ropl.SetSubtractMLE(false); - - // profile likelihood where alternate is best fit value of signal yield - // \lambda(0) = -log L(s=0,\hat\hat{b})/L(\hat{s},\hat{b}) - ProfileLikelihoodTestStat profll(*b_modelXY.GetPdf()); - - // just use the maximum likelihood estimate of signal yield - // MLE = \hat{s} - MaxLikelihoodEstimateTestStat mlets(*sb_modelXY.GetPdf(), *w->var("s")); - - // However, it is less clear how to justify the prior used in randomizing - // the nuisance parameters (since that is a property of the ensemble, - // and y is a property of each toy pseudo experiment. In that case, - // one probably wants to consider a different y0 which will be held - // constant and the prior \pi(b) = Pois(y0 | tau b) * \eta(b). - w->factory("y0[100]"); - w->factory("Gamma::gamma_y0(b,sum::temp0(y0,1),1,0)"); - w->factory("Gaussian::gauss_prior_y0(b,y0, expr::sqrty0('sqrt(y0)',y0))"); - - - // HYBRID CALCULATOR - HybridCalculator hc3(*dataXY, sb_modelXY, b_modelXY); - ToyMCSampler *toymcs3 = (ToyMCSampler*)hc3.GetTestStatSampler(); - toymcs3->SetNEventsPerToy(1); - toymcs3->SetTestStatistic(&slrts); - hc3.SetToys(30000,1000); - hc3.ForcePriorNuisanceAlt(*w->pdf("gamma_y0")); - hc3.ForcePriorNuisanceNull(*w->pdf("gamma_y0")); - // if you wanted to use the ad hoc Gaussian prior instead - // hc3.ForcePriorNuisanceAlt(*w->pdf("gauss_prior_y0")); - // hc3.ForcePriorNuisanceNull(*w->pdf("gauss_prior_y0")); - - // choose fit-based test statistic - toymcs3->SetTestStatistic(&profll); - //toymcs3->SetTestStatistic(&ropl); - //toymcs3->SetTestStatistic(&mlets); - - // enable proof - if(pc) toymcs3->SetProofConfig(pc); - - // these lines save current msg level and then kill any messages below ERROR - RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); - // Get the result - HypoTestResult *r3 = hc3.GetHypoTest(); - cout << "-----------------------------------------"<Print(); - t.Stop(); t.Print(); t.Reset(); t.Start(); - RooMsgService::instance().setGlobalKillBelow(msglevel); - - c->cd(4); - c->GetPad(4)->SetLogy(); - HypoTestPlot *p3 = new HypoTestPlot(*r3,50); // 50 bins - p3->Draw(); - - c->SaveAs("zbi.pdf"); - - - /////////////////////////////////////////////////////////// - // OUTPUT W/O PROOF (2.66 GHz Intel Core i7) - /////////////////////////////////////////////////////////// - - /* ------------------------------------------ -Part 2 -Hybrid p-value from direct integration = 0.00094165 -Z_Gamma Significance = 3.10804 ------------------------------------------ -Part 3 -Z_Bi p-value (analytic): 0.00094165 -Z_Bi significance (analytic): 3.10804 -Real time 0:00:00, CP time 0.610 - ------------------------------------------ -Part 4 -Results HybridCalculator_result: - - Null p-value = 0.00115 +/- 0.000228984 - - Significance = 3.04848 sigma - - Number of S+B toys: 1000 - - Number of B toys: 20000 - - Test statistic evaluated on data: 150 - - CL_b: 0.99885 +/- 0.000239654 - - CL_s+b: 0.476 +/- 0.0157932 - - CL_s: 0.476548 +/- 0.0158118 -Real time 0:00:07, CP time 7.620 - ------------------------------------------ -Part 5 -Results HybridCalculator_result: - - Null p-value = 0.0009 +/- 0.000206057 - - Significance = 3.12139 sigma - - Number of S+B toys: 1000 - - Number of B toys: 20000 - - Test statistic evaluated on data: 10.8198 - - CL_b: 0.9991 +/- 0.000212037 - - CL_s+b: 0.465 +/- 0.0157726 - - CL_s: 0.465419 +/- 0.0157871 -Real time 0:00:34, CP time 34.360 - ------------------------------------------ -Part 6 -Results HybridCalculator_result: - - Null p-value = 0.000666667 +/- 0.000149021 - - Significance = 3.20871 sigma - - Number of S+B toys: 1000 - - Number of B toys: 30000 - - Test statistic evaluated on data: 5.03388 - - CL_b: 0.999333 +/- 0.000149021 - - CL_s+b: 0.511 +/- 0.0158076 - - CL_s: 0.511341 +/- 0.0158183 -Real time 0:05:06, CP time 306.330 - - */ - - - - /////////////////////////////////////////////////////////// - // OUTPUT w/ PROOF (2.66 GHz Intel Core i7, 4 virtual cores) - /////////////////////////////////////////////////////////// - /* ------------------------------------------ -Part 5 -Results HybridCalculator_result: - - Null p-value = 0.00075 +/- 0.000173124 - - Significance = 3.17468 sigma - - Number of S+B toys: 1000 - - Number of B toys: 20000 - - Test statistic evaluated on data: 10.8198 - - CL_b: 0.99925 +/- 0.000193577 - - CL_s+b: 0.454 +/- 0.0157443 - - CL_s: 0.454341 +/- 0.0157564 -Real time 0:00:16, CP time 0.990 - ------------------------------------------ -Part 6 -Results HybridCalculator_result: - - Null p-value = 0.0007 +/- 0.000152699 - - Significance = 3.19465 sigma - - Number of S+B toys: 1000 - - Number of B toys: 30000 - - Test statistic evaluated on data: 5.03388 - - CL_b: 0.9993 +/- 0.000152699 - - CL_s+b: 0.518 +/- 0.0158011 - - CL_s: 0.518363 +/- 0.0158124 -Real time 0:01:25, CP time 0.580 + // This tutorial has 6 parts + // Table of Contents + // Setup + // 1. Make the model for the 'prototype problem' + // Special cases + // 2. Use RooFit's direct integration to get p-value & significance + // 3. Use RooStats analytic solution for this problem + // RooStats HybridCalculator -- can be generalized + // 4. RooStats ToyMC version of 2. & 3. + // 5. RooStats ToyMC with an equivalent test statistic + // 6. RooStats ToyMC with simultaneous control & main measurement + + // It takes ~4 min without PROOF and ~2 min with PROOF on 4 cores. + // Of course, everything looks nicer with more toys, which takes longer. + + #ifdef __CINT__ + cout << "DO NOT RUN WITH CINT: we are using a custom test statistic "; + cout << "which requires that this tutorial must be compiled "; + cout << "with ACLIC" << endl; + return; + #endif + + + TStopwatch t; + t.Start(); + TCanvas *c = new TCanvas; + c->Divide(2,2); + + /////////////////////////////////////////////////////// + // P A R T 1 : D I R E C T I N T E G R A T I O N + ////////////////////////////////////////////////////// + // Make model for prototype on/off problem + // Pois(x | s+b) * Pois(y | tau b ) + // for Z_Gamma, use uniform prior on b. + RooWorkspace* w = new RooWorkspace("w"); + w->factory("Poisson::px(x[150,0,500],sum::splusb(s[0,0,100],b[100,0,300]))"); + w->factory("Poisson::py(y[100,0,500],prod::taub(tau[1.],b))"); + w->factory("PROD::model(px,py)"); + w->factory("Uniform::prior_b(b)"); + + // We will control the output level in a few places to avoid + // verbose progress messages. We start by keeping track + // of the current threshold on messages. + RooFit::MsgLevel msglevel = RooMsgService::instance().globalKillBelow(); + + // Use PROOF-lite on multi-core machines + ProofConfig* pc = NULL; + // uncomment below if you want to use PROOF + // pc = new ProofConfig(*w, 4, "workers=4", kFALSE); // machine with 4 cores + // pc = new ProofConfig(*w, 2, "workers=2", kFALSE); // machine with 2 cores + + /////////////////////////////////////////////////////// + // P A R T 2 : D I R E C T I N T E G R A T I O N + ////////////////////////////////////////////////////// + // This is not the 'RooStats' way, but in this case the distribution + // of the test statistic is simply x and can be calculated directly + // from the PDF using RooFit's built-in integration. + // Note, this does not generalize to situations in which the test statistic + // depends on many events (rows in a dataset). + + // construct the Bayesian-averaged model (eg. a projection pdf) + // p'(x|s) = \int db p(x|s+b) * [ p(y|b) * prior(b) ] + w->factory("PROJ::averagedModel(PROD::foo(px|b,py,prior_b),b)") ; + + RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); // lower message level + // plot it, red is averaged model, green is b known exactly, blue is s+b av model + RooPlot* frame = w->var("x")->frame(Range(50,230)) ; + w->pdf("averagedModel")->plotOn(frame,LineColor(kRed)) ; + w->pdf("px")->plotOn(frame,LineColor(kGreen)) ; + w->var("s")->setVal(50.); + w->pdf("averagedModel")->plotOn(frame,LineColor(kBlue)) ; + c->cd(1); + frame->Draw() ; + w->var("s")->setVal(0.); + + // compare analytic calculation of Z_Bi + // with the numerical RooFit implementation of Z_Gamma + // for an example with x = 150, y = 100 + + // numeric RooFit Z_Gamma + w->var("y")->setVal(100); + w->var("x")->setVal(150); + RooAbsReal* cdf = w->pdf("averagedModel")->createCdf(*w->var("x")); + cdf->getVal(); // get ugly print messages out of the way + cout << "-----------------------------------------"<defineSet("obs","x"); + w->defineSet("poi","s"); + + // create a toy dataset with the x=150 + RooDataSet *data = new RooDataSet("d", "d", *w->set("obs")); + data->add(*w->set("obs")); + + ////////////////////////////////////////////////////////// + // Part 3a : Setup ModelConfigs + // create the null (background-only) ModelConfig with s=0 + ModelConfig b_model("B_model", w); + b_model.SetPdf(*w->pdf("px")); + b_model.SetObservables(*w->set("obs")); + b_model.SetParametersOfInterest(*w->set("poi")); + w->var("s")->setVal(0.0); // important! + b_model.SetSnapshot(*w->set("poi")); + + // create the alternate (signal+background) ModelConfig with s=50 + ModelConfig sb_model("S+B_model", w); + sb_model.SetPdf(*w->pdf("px")); + sb_model.SetObservables(*w->set("obs")); + sb_model.SetParametersOfInterest(*w->set("poi")); + w->var("s")->setVal(50.0); // important! + sb_model.SetSnapshot(*w->set("poi")); + + + ////////////////////////////////////////////////////////// + // Part 3b : Choose Test Statistic + // To make an equivalent calculation we need to use x as the test + // statistic. This is not a built-in test statistic in RooStats + // so we define it above. The new class inherits from the + // RooStats::TestStatistic interface, and simply returns the value + // of x in the dataset. + + BinCountTestStat binCount("x"); + + ////////////////////////////////////////////////////////// + // Part 3c : Define Prior used to randomize nuisance parameters + // + // The prior used for the hybrid calculator is the posterior + // from the auxiliary measurement y. The model for the aux. + // measurement is Pois(y|tau*b), thus the likleihood function + // is proportional to (has the form of) a Gamma distribution. + // if the 'original prior' \eta(b) is uniform, then from + // Bayes's theorem we have the posterior: + // \pi(b) = Pois(y|tau*b) * \eta(b) + // If \eta(b) is flat, then we arrive at a Gamma distribution. + // Since RooFit will normalize the PDF we can actually supply + // py=Pois(y,tau*b) that will be equivalent to multiplying by a uniform. + // + // Alternatively, we could explicitly use a gamma distribution: + // w->factory("Gamma::gamma(b,sum::temp(y,1),1,0)"); + // + // or we can use some other ad hoc prior that do not naturally + // follow from the known form of the auxiliary measurement. + // The common choice is the equivalent Gaussian: + w->factory("Gaussian::gauss_prior(b,y, expr::sqrty('sqrt(y)',y))"); + // this corresponds to the "Z_N" calculation. + // + // or one could use the analogous log-normal prior + w->factory("Lognormal::lognorm_prior(b,y, expr::kappa('1+1./sqrt(y)',y))"); + // + // Ideally, the HybridCalculator would be able to inspect the full + // model Pois(x | s+b) * Pois(y | tau b ) and be given the original + // prior \eta(b) to form \pi(b) = Pois(y|tau*b) * \eta(b). + // This is not yet implemented because in the general case + // it is not easy to identify the terms in the PDF that correspond + // to the auxiliary measurement. So for now, it must be set + // explicitly with: + // - ForcePriorNuisanceNull() + // - ForcePriorNuisanceAlt() + // the name "ForcePriorNuisance" was chosen because we anticipate + // this to be auto-detected, but will leave the option open + // to force to a different prior for the nuisance parameters. + + ////////////////////////////////////////////////////////// + // Part 3d : Construct and configure the HybridCalculator + + HybridCalculator hc1(*data, sb_model, b_model); + ToyMCSampler *toymcs1 = (ToyMCSampler*)hc1.GetTestStatSampler(); + toymcs1->SetNEventsPerToy(1); // because the model is in number counting form + toymcs1->SetTestStatistic(&binCount); // set the test statistic + hc1.SetToys(20000,1000); + hc1.ForcePriorNuisanceAlt(*w->pdf("py")); + hc1.ForcePriorNuisanceNull(*w->pdf("py")); + // if you wanted to use the ad hoc Gaussian prior instead + // hc1.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); + // hc1.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); + // if you wanted to use the ad hoc log-normal prior instead + // hc1.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); + // hc1.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); + + // enable proof + // NOTE: This test statistic is defined in this macro, and is not + // working with PROOF currently. Luckily test stat is fast to evaluate. + // if(pc) toymcs1->SetProofConfig(pc); + + // these lines save current msg level and then kill any messages below ERROR + RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); + // Get the result + HypoTestResult *r1 = hc1.GetHypoTest(); + RooMsgService::instance().setGlobalKillBelow(msglevel); // set it back + cout << "-----------------------------------------"<Print(); + t.Stop(); t.Print(); t.Reset(); t.Start(); + + c->cd(2); + HypoTestPlot *p1 = new HypoTestPlot(*r1,30); // 30 bins, TS is discrete + p1->Draw(); + + //////////////////////////////////////////////////////////////////////////// + // P A R T 5 : U S I N G H Y B R I D C A L C U L A T O R W I T H + // A N A L T E R N A T I V E T E S T S T A T I S T I C + ///////////////////////////////////////////////////////////////////////////// + // + // A likelihood ratio test statistics should be 1-to-1 with the count x + // when the value of b is fixed in the likelihood. This is implemented + // by the SimpleLikelihoodRatioTestStat + + SimpleLikelihoodRatioTestStat slrts(*b_model.GetPdf(),*sb_model.GetPdf()); + slrts.SetNullParameters(*b_model.GetSnapshot()); + slrts.SetAltParameters(*sb_model.GetSnapshot()); + + // HYBRID CALCULATOR + HybridCalculator hc2(*data, sb_model, b_model); + ToyMCSampler *toymcs2 = (ToyMCSampler*)hc2.GetTestStatSampler(); + toymcs2->SetNEventsPerToy(1); + toymcs2->SetTestStatistic(&slrts); + hc2.SetToys(20000,1000); + hc2.ForcePriorNuisanceAlt(*w->pdf("py")); + hc2.ForcePriorNuisanceNull(*w->pdf("py")); + // if you wanted to use the ad hoc Gaussian prior instead + // hc2.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); + // hc2.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); + // if you wanted to use the ad hoc log-normal prior instead + // hc2.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); + // hc2.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); + + // enable proof + if(pc) toymcs2->SetProofConfig(pc); + + // these lines save current msg level and then kill any messages below ERROR + RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); + // Get the result + HypoTestResult *r2 = hc2.GetHypoTest(); + cout << "-----------------------------------------"<Print(); + t.Stop(); t.Print(); t.Reset(); t.Start(); + RooMsgService::instance().setGlobalKillBelow(msglevel); + + c->cd(3); + HypoTestPlot *p2 = new HypoTestPlot(*r2,30); // 30 bins + p2->Draw(); + + //////////////////////////////////////////////////////////////////////////// + // P A R T 6 : U S I N G H Y B R I D C A L C U L A T O R W I T H + // A N A L T E R N A T I V E T E S T S T A T I S T I C + // A N D S I M U L T A N E O U S M O D E L + ///////////////////////////////////////////////////////////////////////////// + // + // If one wants to use a test statistic in which the nuisance parameters + // are profiled (in one way or another), then the PDF must constrain b. + // Otherwise any observation x can always be explained with s=0 and b=x/tau. + // + // In this case, one is really thinking about the problem in a + // different way. They are considering x,y simultaneously. + // and the PDF should be Pois(x | s+b) * Pois(y | tau b ) + // and the set 'obs' should be {x,y}. + + w->defineSet("obsXY","x,y"); + + // create a toy dataset with the x=150, y=100 + w->var("x")->setVal(150.); + w->var("y")->setVal(100.); + RooDataSet *dataXY = new RooDataSet("dXY", "dXY", *w->set("obsXY")); + dataXY->add(*w->set("obsXY")); + + // now we need new model configs, with PDF="model" + ModelConfig b_modelXY("B_modelXY", w); + b_modelXY.SetPdf(*w->pdf("model")); // IMPORTANT + b_modelXY.SetObservables(*w->set("obsXY")); + b_modelXY.SetParametersOfInterest(*w->set("poi")); + w->var("s")->setVal(0.0); // IMPORTANT + b_modelXY.SetSnapshot(*w->set("poi")); + + // create the alternate (signal+background) ModelConfig with s=50 + ModelConfig sb_modelXY("S+B_modelXY", w); + sb_modelXY.SetPdf(*w->pdf("model")); // IMPORTANT + sb_modelXY.SetObservables(*w->set("obsXY")); + sb_modelXY.SetParametersOfInterest(*w->set("poi")); + w->var("s")->setVal(50.0); // IMPORTANT + sb_modelXY.SetSnapshot(*w->set("poi")); + + // without this print, their can be a crash when using PROOF. Strange. + // w->Print(); + + // Test statistics like the profile likelihood ratio + // (or the ratio of profiled likelihoods (Tevatron) or the MLE for s) + // will now work, since the nuisance parameter b is constrained by y. + // ratio of alt and null likelihoods with background yield profiled. + // + // NOTE: These are slower because they have to run fits for each toy + + // Tevatron-style Ratio of profiled likelihoods + // Q_Tev = -log L(s=0,\hat\hat{b})/L(s=50,\hat\hat{b}) + RatioOfProfiledLikelihoodsTestStat + ropl(*b_modelXY.GetPdf(), *sb_modelXY.GetPdf(), sb_modelXY.GetSnapshot()); + ropl.SetSubtractMLE(false); + + // profile likelihood where alternate is best fit value of signal yield + // \lambda(0) = -log L(s=0,\hat\hat{b})/L(\hat{s},\hat{b}) + ProfileLikelihoodTestStat profll(*b_modelXY.GetPdf()); + + // just use the maximum likelihood estimate of signal yield + // MLE = \hat{s} + MaxLikelihoodEstimateTestStat mlets(*sb_modelXY.GetPdf(), *w->var("s")); + + // However, it is less clear how to justify the prior used in randomizing + // the nuisance parameters (since that is a property of the ensemble, + // and y is a property of each toy pseudo experiment. In that case, + // one probably wants to consider a different y0 which will be held + // constant and the prior \pi(b) = Pois(y0 | tau b) * \eta(b). + w->factory("y0[100]"); + w->factory("Gamma::gamma_y0(b,sum::temp0(y0,1),1,0)"); + w->factory("Gaussian::gauss_prior_y0(b,y0, expr::sqrty0('sqrt(y0)',y0))"); + + + // HYBRID CALCULATOR + HybridCalculator hc3(*dataXY, sb_modelXY, b_modelXY); + ToyMCSampler *toymcs3 = (ToyMCSampler*)hc3.GetTestStatSampler(); + toymcs3->SetNEventsPerToy(1); + toymcs3->SetTestStatistic(&slrts); + hc3.SetToys(30000,1000); + hc3.ForcePriorNuisanceAlt(*w->pdf("gamma_y0")); + hc3.ForcePriorNuisanceNull(*w->pdf("gamma_y0")); + // if you wanted to use the ad hoc Gaussian prior instead + // hc3.ForcePriorNuisanceAlt(*w->pdf("gauss_prior_y0")); + // hc3.ForcePriorNuisanceNull(*w->pdf("gauss_prior_y0")); + + // choose fit-based test statistic + toymcs3->SetTestStatistic(&profll); + //toymcs3->SetTestStatistic(&ropl); + //toymcs3->SetTestStatistic(&mlets); + + // enable proof + if(pc) toymcs3->SetProofConfig(pc); + + // these lines save current msg level and then kill any messages below ERROR + RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); + // Get the result + HypoTestResult *r3 = hc3.GetHypoTest(); + cout << "-----------------------------------------"<Print(); + t.Stop(); t.Print(); t.Reset(); t.Start(); + RooMsgService::instance().setGlobalKillBelow(msglevel); + + c->cd(4); + c->GetPad(4)->SetLogy(); + HypoTestPlot *p3 = new HypoTestPlot(*r3,50); // 50 bins + p3->Draw(); + + c->SaveAs("zbi.pdf"); + + + /////////////////////////////////////////////////////////// + // OUTPUT W/O PROOF (2.66 GHz Intel Core i7) + /////////////////////////////////////////////////////////// + + /* + ----------------------------------------- + Part 2 + Hybrid p-value from direct integration = 0.00094165 + Z_Gamma Significance = 3.10804 + ----------------------------------------- + Part 3 + Z_Bi p-value (analytic): 0.00094165 + Z_Bi significance (analytic): 3.10804 + Real time 0:00:00, CP time 0.610 + + ----------------------------------------- + Part 4 + Results HybridCalculator_result: + - Null p-value = 0.00115 +/- 0.000228984 + - Significance = 3.04848 sigma + - Number of S+B toys: 1000 + - Number of B toys: 20000 + - Test statistic evaluated on data: 150 + - CL_b: 0.99885 +/- 0.000239654 + - CL_s+b: 0.476 +/- 0.0157932 + - CL_s: 0.476548 +/- 0.0158118 + Real time 0:00:07, CP time 7.620 + + ----------------------------------------- + Part 5 + Results HybridCalculator_result: + - Null p-value = 0.0009 +/- 0.000206057 + - Significance = 3.12139 sigma + - Number of S+B toys: 1000 + - Number of B toys: 20000 + - Test statistic evaluated on data: 10.8198 + - CL_b: 0.9991 +/- 0.000212037 + - CL_s+b: 0.465 +/- 0.0157726 + - CL_s: 0.465419 +/- 0.0157871 + Real time 0:00:34, CP time 34.360 + + ----------------------------------------- + Part 6 + Results HybridCalculator_result: + - Null p-value = 0.000666667 +/- 0.000149021 + - Significance = 3.20871 sigma + - Number of S+B toys: 1000 + - Number of B toys: 30000 + - Test statistic evaluated on data: 5.03388 + - CL_b: 0.999333 +/- 0.000149021 + - CL_s+b: 0.511 +/- 0.0158076 + - CL_s: 0.511341 +/- 0.0158183 + Real time 0:05:06, CP time 306.330 */ - ////////////////////////////////////////// - // Comparison - /////////////////////////////////////////// - // LEPStatToolsForLHC - // https://plone4.fnal.gov:4430/P0/phystat/packages/0703002 - // Uses Gaussian prior - // CL_b = 6.218476e-04, Significance = 3.228665 sigma - // - ////////////////////////////////////////// - // Comparison - /////////////////////////////////////////// - // Asymptotics - // From the value of the profile likelihood ratio (5.0338) - // The significance can be estimated using Wilks's theorem - // significance = sqrt(2*profileLR) = 3.1729 sigma + + + /////////////////////////////////////////////////////////// + // OUTPUT w/ PROOF (2.66 GHz Intel Core i7, 4 virtual cores) + /////////////////////////////////////////////////////////// + /* + ----------------------------------------- + Part 5 + Results HybridCalculator_result: + - Null p-value = 0.00075 +/- 0.000173124 + - Significance = 3.17468 sigma + - Number of S+B toys: 1000 + - Number of B toys: 20000 + - Test statistic evaluated on data: 10.8198 + - CL_b: 0.99925 +/- 0.000193577 + - CL_s+b: 0.454 +/- 0.0157443 + - CL_s: 0.454341 +/- 0.0157564 + Real time 0:00:16, CP time 0.990 + + ----------------------------------------- + Part 6 + Results HybridCalculator_result: + - Null p-value = 0.0007 +/- 0.000152699 + - Significance = 3.19465 sigma + - Number of S+B toys: 1000 + - Number of B toys: 30000 + - Test statistic evaluated on data: 5.03388 + - CL_b: 0.9993 +/- 0.000152699 + - CL_s+b: 0.518 +/- 0.0158011 + - CL_s: 0.518363 +/- 0.0158124 + Real time 0:01:25, CP time 0.580 + + */ + + ////////////////////////////////////////// + // Comparison + /////////////////////////////////////////// + // LEPStatToolsForLHC + // https://plone4.fnal.gov:4430/P0/phystat/packages/0703002 + // Uses Gaussian prior + // CL_b = 6.218476e-04, Significance = 3.228665 sigma + // + ////////////////////////////////////////// + // Comparison + /////////////////////////////////////////// + // Asymptotics + // From the value of the profile likelihood ratio (5.0338) + // The significance can be estimated using Wilks's theorem + // significance = sqrt(2*profileLR) = 3.1729 sigma } diff --git a/tutorials/roostats/HybridOriginalDemo.C b/tutorials/roostats/HybridOriginalDemo.C index abd64b0c8a8eb..5c1fb717595da 100644 --- a/tutorials/roostats/HybridOriginalDemo.C +++ b/tutorials/roostats/HybridOriginalDemo.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// Example on how to use the HybridCalculatorOriginal class /// /// With this example, you should get: CL_sb = 0.130 and CL_b = 0.946 @@ -20,11 +21,6 @@ #include "RooDataSet.h" #include "RooExtendPdf.h" #include "RooConstVar.h" - -#ifndef __CINT__ // problem including this file with CINT -#include "RooGlobalFunc.h" -#endif - #include "RooStats/HybridCalculatorOriginal.h" #include "RooStats/HybridResult.h" #include "RooStats/HybridPlot.h" diff --git a/tutorials/roostats/HybridStandardForm.C b/tutorials/roostats/HybridStandardForm.C index 389173fba7254..6630c36e4aa4f 100644 --- a/tutorials/roostats/HybridStandardForm.C +++ b/tutorials/roostats/HybridStandardForm.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// A hypothesis testing example based on number counting with background uncertainty. /// /// A hypothesis testing example based on number counting @@ -138,303 +139,303 @@ ClassImp(BinCountTestStat) void HybridStandardForm() { - // This tutorial has 6 parts - // Table of Contents - // Setup - // 1. Make the model for the 'prototype problem' - // Special cases - // 2. NOT RELEVANT HERE - // 3. Use RooStats analytic solution for this problem - // RooStats HybridCalculator -- can be generalized - // 4. RooStats ToyMC version of 2. & 3. - // 5. RooStats ToyMC with an equivalent test statistic - // 6. RooStats ToyMC with simultaneous control & main measurement - - // Part 4 takes ~4 min without PROOF. - // Part 5 takes about ~2 min with PROOF on 4 cores. - // Of course, everything looks nicer with more toys, which takes longer. - - - TStopwatch t; - t.Start(); - TCanvas *c = new TCanvas; - c->Divide(2,2); - - /////////////////////////////////////////////////////// - // P A R T 1 : D I R E C T I N T E G R A T I O N - ////////////////////////////////////////////////////// - // Make model for prototype on/off problem - // Pois(x | s+b) * Pois(y | tau b ) - // for Z_Gamma, use uniform prior on b. - RooWorkspace* w = new RooWorkspace("w"); - - - // replace the pdf in 'number couting form' - //w->factory("Poisson::px(x[150,0,500],sum::splusb(s[0,0,100],b[100,0,300]))"); - // with one in standard form. Now x is encoded in event count - w->factory("Uniform::f(m[0,1])");//m is a dummy discriminanting variable - w->factory("ExtendPdf::px(f,sum::splusb(s[0,0,100],b[100,0,300]))"); - w->factory("Poisson::py(y[100,0,500],prod::taub(tau[1.],b))"); - w->factory("PROD::model(px,py)"); - w->factory("Uniform::prior_b(b)"); - - // We will control the output level in a few places to avoid - // verbose progress messages. We start by keeping track - // of the current threshold on messages. - RooFit::MsgLevel msglevel = RooMsgService::instance().globalKillBelow(); - - // Use PROOF-lite on multi-core machines - ProofConfig* pc = NULL; - // uncomment below if you want to use PROOF - pc = new ProofConfig(*w, 4, "workers=4", kFALSE); // machine with 4 cores - // pc = new ProofConfig(*w, 2, "workers=2", kFALSE); // machine with 2 cores - - ///////////////////////////////////////////////// - // P A R T 3 : A N A L Y T I C R E S U L T - ///////////////////////////////////////////////// - // In this special case, the integrals are known analytically - // and they are implemented in RooStats::NumberCountingUtils - - // analytic Z_Bi - double p_Bi = NumberCountingUtils::BinomialWithTauObsP(150, 100, 1); - double Z_Bi = NumberCountingUtils::BinomialWithTauObsZ(150, 100, 1); - cout << "-----------------------------------------"<defineSet("obs","m"); - w->defineSet("poi","s"); - - // create a toy dataset with the x=150 - // RooDataSet *data = new RooDataSet("d", "d", *w->set("obs")); - // data->add(*w->set("obs")); - RooDataSet* data = w->pdf("px")->generate(*w->set("obs"),150); - - ////////////////////////////////////////////////////////// - // Part 3a : Setup ModelConfigs - // create the null (background-only) ModelConfig with s=0 - ModelConfig b_model("B_model", w); - b_model.SetPdf(*w->pdf("px")); - b_model.SetObservables(*w->set("obs")); - b_model.SetParametersOfInterest(*w->set("poi")); - w->var("s")->setVal(0.0); // important! - b_model.SetSnapshot(*w->set("poi")); - - // create the alternate (signal+background) ModelConfig with s=50 - ModelConfig sb_model("S+B_model", w); - sb_model.SetPdf(*w->pdf("px")); - sb_model.SetObservables(*w->set("obs")); - sb_model.SetParametersOfInterest(*w->set("poi")); - w->var("s")->setVal(50.0); // important! - sb_model.SetSnapshot(*w->set("poi")); - - - ////////////////////////////////////////////////////////// - // Part 3b : Choose Test Statistic - // To make an equivalent calculation we need to use x as the test - // statistic. This is not a built-in test statistic in RooStats - // so we define it above. The new class inherits from the - // RooStats::TestStatistic interface, and simply returns the value - // of x in the dataset. - - NumEventsTestStat eventCount(*w->pdf("px")); - - ////////////////////////////////////////////////////////// - // Part 3c : Define Prior used to randomize nuisance parameters - // - // The prior used for the hybrid calculator is the posterior - // from the auxiliary measurement y. The model for the aux. - // measurement is Pois(y|tau*b), thus the likleihood function - // is proportional to (has the form of) a Gamma distribution. - // if the 'original prior' \eta(b) is uniform, then from - // Bayes's theorem we have the posterior: - // \pi(b) = Pois(y|tau*b) * \eta(b) - // If \eta(b) is flat, then we arrive at a Gamma distribution. - // Since RooFit will normalize the PDF we can actually supply - // py=Pois(y,tau*b) that will be equivalent to multiplying by a uniform. - // - // Alternatively, we could explicitly use a gamma distribution: - // w->factory("Gamma::gamma(b,sum::temp(y,1),1,0)"); - // - // or we can use some other ad hoc prior that do not naturally - // follow from the known form of the auxiliary measurement. - // The common choice is the equivlaent Gaussian: - w->factory("Gaussian::gauss_prior(b,y, expr::sqrty('sqrt(y)',y))"); - // this corresponds to the "Z_N" calculation. - // - // or one could use the analogous log-normal prior - w->factory("Lognormal::lognorm_prior(b,y, expr::kappa('1+1./sqrt(y)',y))"); - // - // Ideally, the HybridCalculator would be able to inspect the full - // model Pois(x | s+b) * Pois(y | tau b ) and be given the original - // prior \eta(b) to form \pi(b) = Pois(y|tau*b) * \eta(b). - // This is not yet implemented because in the general case - // it is not easy to identify the terms in the PDF that correspond - // to the auxiliary measurement. So for now, it must be set - // explicitly with: - // - ForcePriorNuisanceNull() - // - ForcePriorNuisanceAlt() - // the name "ForcePriorNuisance" was chosen because we anticipate - // this to be auto-detected, but will leave the option open - // to force to a different prior for the nuisance parameters. - - ////////////////////////////////////////////////////////// - // Part 3d : Construct and configure the HybridCalculator - - HybridCalculator hc1(*data, sb_model, b_model); - ToyMCSampler *toymcs1 = (ToyMCSampler*)hc1.GetTestStatSampler(); - // toymcs1->SetNEventsPerToy(1); // because the model is in number counting form - toymcs1->SetTestStatistic(&eventCount); // set the test statistic - // toymcs1->SetGenerateBinned(); - hc1.SetToys(30000,1000); - hc1.ForcePriorNuisanceAlt(*w->pdf("py")); - hc1.ForcePriorNuisanceNull(*w->pdf("py")); - // if you wanted to use the ad hoc Gaussian prior instead - // hc1.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); - // hc1.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); - // if you wanted to use the ad hoc log-normal prior instead - // hc1.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); - // hc1.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); - - // enable proof - // proof not enabled for this test statistic - // if(pc) toymcs1->SetProofConfig(pc); - - // these lines save current msg level and then kill any messages below ERROR - RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); - // Get the result - HypoTestResult *r1 = hc1.GetHypoTest(); - RooMsgService::instance().setGlobalKillBelow(msglevel); // set it back - cout << "-----------------------------------------"<Print(); - t.Stop(); t.Print(); t.Reset(); t.Start(); - - c->cd(2); - HypoTestPlot *p1 = new HypoTestPlot(*r1,30); // 30 bins, TS is discrete - p1->Draw(); - - return; // keep the running time sort by default - //////////////////////////////////////////////////////////////////////////// - // P A R T 5 : U S I N G H Y B R I D C A L C U L A T O R W I T H - // A N A L T E R N A T I V E T E S T S T A T I S T I C - ///////////////////////////////////////////////////////////////////////////// - // - // A likelihood ratio test statistics should be 1-to-1 with the count x - // when the value of b is fixed in the likelihood. This is implemented - // by the SimpleLikelihoodRatioTestStat - - SimpleLikelihoodRatioTestStat slrts(*b_model.GetPdf(),*sb_model.GetPdf()); - slrts.SetNullParameters(*b_model.GetSnapshot()); - slrts.SetAltParameters(*sb_model.GetSnapshot()); - - // HYBRID CALCULATOR - HybridCalculator hc2(*data, sb_model, b_model); - ToyMCSampler *toymcs2 = (ToyMCSampler*)hc2.GetTestStatSampler(); - // toymcs2->SetNEventsPerToy(1); - toymcs2->SetTestStatistic(&slrts); - // toymcs2->SetGenerateBinned(); - hc2.SetToys(20000,1000); - hc2.ForcePriorNuisanceAlt(*w->pdf("py")); - hc2.ForcePriorNuisanceNull(*w->pdf("py")); - // if you wanted to use the ad hoc Gaussian prior instead - // hc2.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); - // hc2.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); - // if you wanted to use the ad hoc log-normal prior instead - // hc2.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); - // hc2.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); - - // enable proof - if(pc) toymcs2->SetProofConfig(pc); - - // these lines save current msg level and then kill any messages below ERROR - RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); - // Get the result - HypoTestResult *r2 = hc2.GetHypoTest(); - cout << "-----------------------------------------"<Print(); - t.Stop(); t.Print(); t.Reset(); t.Start(); - RooMsgService::instance().setGlobalKillBelow(msglevel); - - c->cd(3); - HypoTestPlot *p2 = new HypoTestPlot(*r2,30); // 30 bins - p2->Draw(); - - return; // so standard tutorial runs faster - - /////////////////////////////////////////////////////////// - // OUTPUT W/O PROOF (2.66 GHz Intel Core i7) - /////////////////////////////////////////////////////////// - - /* ------------------------------------------ -Part 3 -Z_Bi p-value (analytic): 0.00094165 -Z_Bi significance (analytic): 3.10804 -Real time 0:00:00, CP time 0.610 - -Results HybridCalculator_result: - - Null p-value = 0.00103333 +/- 0.000179406 - - Significance = 3.08048 sigma - - Number of S+B toys: 1000 - - Number of B toys: 30000 - - Test statistic evaluated on data: 150 - - CL_b: 0.998967 +/- 0.000185496 - - CL_s+b: 0.495 +/- 0.0158106 - - CL_s: 0.495512 +/- 0.0158272 -Real time 0:04:43, CP time 283.780 - - */ - /* With PROOF ------------------------------------------ -Part 5 - -Results HybridCalculator_result: - - Null p-value = 0.00105 +/- 0.000206022 - - Significance = 3.07571 sigma - - Number of S+B toys: 1000 - - Number of B toys: 20000 - - Test statistic evaluated on data: 10.8198 - - CL_b: 0.99895 +/- 0.000229008 - - CL_s+b: 0.491 +/- 0.0158088 - - CL_s: 0.491516 +/- 0.0158258 -Real time 0:02:22, CP time 0.990 - */ - - ////////////////////////////////////////// - // Comparison - /////////////////////////////////////////// - // LEPStatToolsForLHC - // https://plone4.fnal.gov:4430/P0/phystat/packages/0703002 - // Uses Gaussian prior - // CL_b = 6.218476e-04, Significance = 3.228665 sigma - // - ////////////////////////////////////////// - // Comparison - /////////////////////////////////////////// - // Asymptotics - // From the value of the profile likelihood ratio (5.0338) - // The significance can be estimated using Wilks's theorem - // significance = sqrt(2*profileLR) = 3.1729 sigma + // This tutorial has 6 parts + // Table of Contents + // Setup + // 1. Make the model for the 'prototype problem' + // Special cases + // 2. NOT RELEVANT HERE + // 3. Use RooStats analytic solution for this problem + // RooStats HybridCalculator -- can be generalized + // 4. RooStats ToyMC version of 2. & 3. + // 5. RooStats ToyMC with an equivalent test statistic + // 6. RooStats ToyMC with simultaneous control & main measurement + + // Part 4 takes ~4 min without PROOF. + // Part 5 takes about ~2 min with PROOF on 4 cores. + // Of course, everything looks nicer with more toys, which takes longer. + + + TStopwatch t; + t.Start(); + TCanvas *c = new TCanvas; + c->Divide(2,2); + + /////////////////////////////////////////////////////// + // P A R T 1 : D I R E C T I N T E G R A T I O N + ////////////////////////////////////////////////////// + // Make model for prototype on/off problem + // Pois(x | s+b) * Pois(y | tau b ) + // for Z_Gamma, use uniform prior on b. + RooWorkspace* w = new RooWorkspace("w"); + + + // replace the pdf in 'number couting form' + //w->factory("Poisson::px(x[150,0,500],sum::splusb(s[0,0,100],b[100,0,300]))"); + // with one in standard form. Now x is encoded in event count + w->factory("Uniform::f(m[0,1])");//m is a dummy discriminanting variable + w->factory("ExtendPdf::px(f,sum::splusb(s[0,0,100],b[100,0,300]))"); + w->factory("Poisson::py(y[100,0,500],prod::taub(tau[1.],b))"); + w->factory("PROD::model(px,py)"); + w->factory("Uniform::prior_b(b)"); + + // We will control the output level in a few places to avoid + // verbose progress messages. We start by keeping track + // of the current threshold on messages. + RooFit::MsgLevel msglevel = RooMsgService::instance().globalKillBelow(); + + // Use PROOF-lite on multi-core machines + ProofConfig* pc = NULL; + // uncomment below if you want to use PROOF + pc = new ProofConfig(*w, 4, "workers=4", kFALSE); // machine with 4 cores + // pc = new ProofConfig(*w, 2, "workers=2", kFALSE); // machine with 2 cores + + ///////////////////////////////////////////////// + // P A R T 3 : A N A L Y T I C R E S U L T + ///////////////////////////////////////////////// + // In this special case, the integrals are known analytically + // and they are implemented in RooStats::NumberCountingUtils + + // analytic Z_Bi + double p_Bi = NumberCountingUtils::BinomialWithTauObsP(150, 100, 1); + double Z_Bi = NumberCountingUtils::BinomialWithTauObsZ(150, 100, 1); + cout << "-----------------------------------------"<defineSet("obs","m"); + w->defineSet("poi","s"); + + // create a toy dataset with the x=150 + // RooDataSet *data = new RooDataSet("d", "d", *w->set("obs")); + // data->add(*w->set("obs")); + RooDataSet* data = w->pdf("px")->generate(*w->set("obs"),150); + + ////////////////////////////////////////////////////////// + // Part 3a : Setup ModelConfigs + // create the null (background-only) ModelConfig with s=0 + ModelConfig b_model("B_model", w); + b_model.SetPdf(*w->pdf("px")); + b_model.SetObservables(*w->set("obs")); + b_model.SetParametersOfInterest(*w->set("poi")); + w->var("s")->setVal(0.0); // important! + b_model.SetSnapshot(*w->set("poi")); + + // create the alternate (signal+background) ModelConfig with s=50 + ModelConfig sb_model("S+B_model", w); + sb_model.SetPdf(*w->pdf("px")); + sb_model.SetObservables(*w->set("obs")); + sb_model.SetParametersOfInterest(*w->set("poi")); + w->var("s")->setVal(50.0); // important! + sb_model.SetSnapshot(*w->set("poi")); + + + ////////////////////////////////////////////////////////// + // Part 3b : Choose Test Statistic + // To make an equivalent calculation we need to use x as the test + // statistic. This is not a built-in test statistic in RooStats + // so we define it above. The new class inherits from the + // RooStats::TestStatistic interface, and simply returns the value + // of x in the dataset. + + NumEventsTestStat eventCount(*w->pdf("px")); + + ////////////////////////////////////////////////////////// + // Part 3c : Define Prior used to randomize nuisance parameters + // + // The prior used for the hybrid calculator is the posterior + // from the auxiliary measurement y. The model for the aux. + // measurement is Pois(y|tau*b), thus the likleihood function + // is proportional to (has the form of) a Gamma distribution. + // if the 'original prior' \eta(b) is uniform, then from + // Bayes's theorem we have the posterior: + // \pi(b) = Pois(y|tau*b) * \eta(b) + // If \eta(b) is flat, then we arrive at a Gamma distribution. + // Since RooFit will normalize the PDF we can actually supply + // py=Pois(y,tau*b) that will be equivalent to multiplying by a uniform. + // + // Alternatively, we could explicitly use a gamma distribution: + // w->factory("Gamma::gamma(b,sum::temp(y,1),1,0)"); + // + // or we can use some other ad hoc prior that do not naturally + // follow from the known form of the auxiliary measurement. + // The common choice is the equivlaent Gaussian: + w->factory("Gaussian::gauss_prior(b,y, expr::sqrty('sqrt(y)',y))"); + // this corresponds to the "Z_N" calculation. + // + // or one could use the analogous log-normal prior + w->factory("Lognormal::lognorm_prior(b,y, expr::kappa('1+1./sqrt(y)',y))"); + // + // Ideally, the HybridCalculator would be able to inspect the full + // model Pois(x | s+b) * Pois(y | tau b ) and be given the original + // prior \eta(b) to form \pi(b) = Pois(y|tau*b) * \eta(b). + // This is not yet implemented because in the general case + // it is not easy to identify the terms in the PDF that correspond + // to the auxiliary measurement. So for now, it must be set + // explicitly with: + // - ForcePriorNuisanceNull() + // - ForcePriorNuisanceAlt() + // the name "ForcePriorNuisance" was chosen because we anticipate + // this to be auto-detected, but will leave the option open + // to force to a different prior for the nuisance parameters. + + ////////////////////////////////////////////////////////// + // Part 3d : Construct and configure the HybridCalculator + + HybridCalculator hc1(*data, sb_model, b_model); + ToyMCSampler *toymcs1 = (ToyMCSampler*)hc1.GetTestStatSampler(); + // toymcs1->SetNEventsPerToy(1); // because the model is in number counting form + toymcs1->SetTestStatistic(&eventCount); // set the test statistic + // toymcs1->SetGenerateBinned(); + hc1.SetToys(30000,1000); + hc1.ForcePriorNuisanceAlt(*w->pdf("py")); + hc1.ForcePriorNuisanceNull(*w->pdf("py")); + // if you wanted to use the ad hoc Gaussian prior instead + // hc1.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); + // hc1.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); + // if you wanted to use the ad hoc log-normal prior instead + // hc1.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); + // hc1.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); + + // enable proof + // proof not enabled for this test statistic + // if(pc) toymcs1->SetProofConfig(pc); + + // these lines save current msg level and then kill any messages below ERROR + RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); + // Get the result + HypoTestResult *r1 = hc1.GetHypoTest(); + RooMsgService::instance().setGlobalKillBelow(msglevel); // set it back + cout << "-----------------------------------------"<Print(); + t.Stop(); t.Print(); t.Reset(); t.Start(); + + c->cd(2); + HypoTestPlot *p1 = new HypoTestPlot(*r1,30); // 30 bins, TS is discrete + p1->Draw(); + + return; // keep the running time sort by default + //////////////////////////////////////////////////////////////////////////// + // P A R T 5 : U S I N G H Y B R I D C A L C U L A T O R W I T H + // A N A L T E R N A T I V E T E S T S T A T I S T I C + ///////////////////////////////////////////////////////////////////////////// + // + // A likelihood ratio test statistics should be 1-to-1 with the count x + // when the value of b is fixed in the likelihood. This is implemented + // by the SimpleLikelihoodRatioTestStat + + SimpleLikelihoodRatioTestStat slrts(*b_model.GetPdf(),*sb_model.GetPdf()); + slrts.SetNullParameters(*b_model.GetSnapshot()); + slrts.SetAltParameters(*sb_model.GetSnapshot()); + + // HYBRID CALCULATOR + HybridCalculator hc2(*data, sb_model, b_model); + ToyMCSampler *toymcs2 = (ToyMCSampler*)hc2.GetTestStatSampler(); + // toymcs2->SetNEventsPerToy(1); + toymcs2->SetTestStatistic(&slrts); + // toymcs2->SetGenerateBinned(); + hc2.SetToys(20000,1000); + hc2.ForcePriorNuisanceAlt(*w->pdf("py")); + hc2.ForcePriorNuisanceNull(*w->pdf("py")); + // if you wanted to use the ad hoc Gaussian prior instead + // hc2.ForcePriorNuisanceAlt(*w->pdf("gauss_prior")); + // hc2.ForcePriorNuisanceNull(*w->pdf("gauss_prior")); + // if you wanted to use the ad hoc log-normal prior instead + // hc2.ForcePriorNuisanceAlt(*w->pdf("lognorm_prior")); + // hc2.ForcePriorNuisanceNull(*w->pdf("lognorm_prior")); + + // enable proof + if(pc) toymcs2->SetProofConfig(pc); + + // these lines save current msg level and then kill any messages below ERROR + RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); + // Get the result + HypoTestResult *r2 = hc2.GetHypoTest(); + cout << "-----------------------------------------"<Print(); + t.Stop(); t.Print(); t.Reset(); t.Start(); + RooMsgService::instance().setGlobalKillBelow(msglevel); + + c->cd(3); + HypoTestPlot *p2 = new HypoTestPlot(*r2,30); // 30 bins + p2->Draw(); + + return; // so standard tutorial runs faster + + /////////////////////////////////////////////////////////// + // OUTPUT W/O PROOF (2.66 GHz Intel Core i7) + /////////////////////////////////////////////////////////// + + /* + ----------------------------------------- + Part 3 + Z_Bi p-value (analytic): 0.00094165 + Z_Bi significance (analytic): 3.10804 + Real time 0:00:00, CP time 0.610 + + Results HybridCalculator_result: + - Null p-value = 0.00103333 +/- 0.000179406 + - Significance = 3.08048 sigma + - Number of S+B toys: 1000 + - Number of B toys: 30000 + - Test statistic evaluated on data: 150 + - CL_b: 0.998967 +/- 0.000185496 + - CL_s+b: 0.495 +/- 0.0158106 + - CL_s: 0.495512 +/- 0.0158272 + Real time 0:04:43, CP time 283.780 + + */ + /* With PROOF + ----------------------------------------- + Part 5 + + Results HybridCalculator_result: + - Null p-value = 0.00105 +/- 0.000206022 + - Significance = 3.07571 sigma + - Number of S+B toys: 1000 + - Number of B toys: 20000 + - Test statistic evaluated on data: 10.8198 + - CL_b: 0.99895 +/- 0.000229008 + - CL_s+b: 0.491 +/- 0.0158088 + - CL_s: 0.491516 +/- 0.0158258 + Real time 0:02:22, CP time 0.990 + */ + + ////////////////////////////////////////// + // Comparison + /////////////////////////////////////////// + // LEPStatToolsForLHC + // https://plone4.fnal.gov:4430/P0/phystat/packages/0703002 + // Uses Gaussian prior + // CL_b = 6.218476e-04, Significance = 3.228665 sigma + // + ////////////////////////////////////////// + // Comparison + /////////////////////////////////////////// + // Asymptotics + // From the value of the profile likelihood ratio (5.0338) + // The significance can be estimated using Wilks's theorem + // significance = sqrt(2*profileLR) = 3.1729 sigma } diff --git a/tutorials/roostats/IntervalExamples.C b/tutorials/roostats/IntervalExamples.C index c23f2e0fccfde..bf966e5a292d8 100644 --- a/tutorials/roostats/IntervalExamples.C +++ b/tutorials/roostats/IntervalExamples.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// Example showing confidence intervals with four techniques. /// /// An example that shows confidence intervals with four techniques. @@ -53,165 +54,165 @@ #include // use this order for safety on library loading -using namespace RooFit ; -using namespace RooStats ; +using namespace RooFit; +using namespace RooStats; void IntervalExamples() { - // Time this macro - TStopwatch t; - t.Start(); - - - // set RooFit random seed for reproducible results - RooRandom::randomGenerator()->SetSeed(3001); - - // make a simple model via the workspace factory - RooWorkspace* wspace = new RooWorkspace(); - wspace->factory("Gaussian::normal(x[-10,10],mu[-1,1],sigma[1])"); - wspace->defineSet("poi","mu"); - wspace->defineSet("obs","x"); - - // specify components of model for statistical tools - ModelConfig* modelConfig = new ModelConfig("Example G(x|mu,1)"); - modelConfig->SetWorkspace(*wspace); - modelConfig->SetPdf( *wspace->pdf("normal") ); - modelConfig->SetParametersOfInterest( *wspace->set("poi") ); - modelConfig->SetObservables( *wspace->set("obs") ); - - // create a toy dataset - RooDataSet* data = wspace->pdf("normal")->generate(*wspace->set("obs"),100); - data->Print(); - - // for convenience later on - RooRealVar* x = wspace->var("x"); - RooRealVar* mu = wspace->var("mu"); - - // set confidence level - double confidenceLevel = 0.95; - - // example use profile likelihood calculator - ProfileLikelihoodCalculator plc(*data, *modelConfig); - plc.SetConfidenceLevel( confidenceLevel); - LikelihoodInterval* plInt = plc.GetInterval(); - - // example use of Feldman-Cousins - FeldmanCousins fc(*data, *modelConfig); - fc.SetConfidenceLevel( confidenceLevel); - fc.SetNBins(100); // number of points to test per parameter - fc.UseAdaptiveSampling(true); // make it go faster - - // Here, we consider only ensembles with 100 events - // The PDF could be extended and this could be removed - fc.FluctuateNumDataEntries(false); - - // Proof - // ProofConfig pc(*wspace, 4, "workers=4", kFALSE); // proof-lite - //ProofConfig pc(w, 8, "localhost"); // proof cluster at "localhost" - // ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); - // toymcsampler->SetProofConfig(&pc); // enable proof - - PointSetInterval* interval = (PointSetInterval*) fc.GetInterval(); - - - // example use of BayesianCalculator - // now we also need to specify a prior in the ModelConfig - wspace->factory("Uniform::prior(mu)"); - modelConfig->SetPriorPdf(*wspace->pdf("prior")); - - // example usage of BayesianCalculator - BayesianCalculator bc(*data, *modelConfig); - bc.SetConfidenceLevel( confidenceLevel); - SimpleInterval* bcInt = bc.GetInterval(); - - // example use of MCMCInterval - MCMCCalculator mc(*data, *modelConfig); - mc.SetConfidenceLevel( confidenceLevel); - // special options - mc.SetNumBins(200); // bins used internally for representing posterior - mc.SetNumBurnInSteps(500); // first N steps to be ignored as burn-in - mc.SetNumIters(100000); // how long to run chain - mc.SetLeftSideTailFraction(0.5); // for central interval - MCMCInterval* mcInt = mc.GetInterval(); - - // for this example we know the expected intervals - double expectedLL = data->mean(*x) - + ROOT::Math::normal_quantile( (1-confidenceLevel)/2,1) - / sqrt(data->numEntries()); - double expectedUL = data->mean(*x) - + ROOT::Math::normal_quantile_c((1-confidenceLevel)/2,1) - / sqrt(data->numEntries()) ; - - // Use the intervals - std::cout << "expected interval is [" << - expectedLL << ", " << - expectedUL << "]" << endl; - - cout << "plc interval is [" << - plInt->LowerLimit(*mu) << ", " << - plInt->UpperLimit(*mu) << "]" << endl; - - std::cout << "fc interval is ["<< - interval->LowerLimit(*mu) << " , " << - interval->UpperLimit(*mu) << "]" << endl; - - cout << "bc interval is [" << - bcInt->LowerLimit() << ", " << - bcInt->UpperLimit() << "]" << endl; - - cout << "mc interval is [" << - mcInt->LowerLimit(*mu) << ", " << - mcInt->UpperLimit(*mu) << "]" << endl; - - mu->setVal(0); - cout << "is mu=0 in the interval? " << - plInt->IsInInterval(RooArgSet(*mu)) << endl; - - - // make a reasonable style - gStyle->SetCanvasColor(0); - gStyle->SetCanvasBorderMode(0); - gStyle->SetPadBorderMode(0); - gStyle->SetPadColor(0); - gStyle->SetCanvasColor(0); - gStyle->SetTitleFillColor(0); - gStyle->SetFillColor(0); - gStyle->SetFrameFillColor(0); - gStyle->SetStatColor(0); - - - // some plots - TCanvas* canvas = new TCanvas("canvas"); - canvas->Divide(2,2); - - // plot the data - canvas->cd(1); - RooPlot* frame = x->frame(); - data->plotOn(frame); - data->statOn(frame); - frame->Draw(); - - // plot the profile likeihood - canvas->cd(2); - LikelihoodIntervalPlot plot(plInt); - plot.Draw(); - - // plot the MCMC interval - canvas->cd(3); - MCMCIntervalPlot* mcPlot = new MCMCIntervalPlot(*mcInt); - mcPlot->SetLineColor(kGreen); - mcPlot->SetLineWidth(2); - mcPlot->Draw(); - - canvas->cd(4); - RooPlot * bcPlot = bc.GetPosteriorPlot(); - bcPlot->Draw(); - - canvas->Update(); - - t.Stop(); - t.Print(); + // Time this macro + TStopwatch t; + t.Start(); + + + // set RooFit random seed for reproducible results + RooRandom::randomGenerator()->SetSeed(3001); + + // make a simple model via the workspace factory + RooWorkspace* wspace = new RooWorkspace(); + wspace->factory("Gaussian::normal(x[-10,10],mu[-1,1],sigma[1])"); + wspace->defineSet("poi","mu"); + wspace->defineSet("obs","x"); + + // specify components of model for statistical tools + ModelConfig* modelConfig = new ModelConfig("Example G(x|mu,1)"); + modelConfig->SetWorkspace(*wspace); + modelConfig->SetPdf( *wspace->pdf("normal") ); + modelConfig->SetParametersOfInterest( *wspace->set("poi") ); + modelConfig->SetObservables( *wspace->set("obs") ); + + // create a toy dataset + RooDataSet* data = wspace->pdf("normal")->generate(*wspace->set("obs"),100); + data->Print(); + + // for convenience later on + RooRealVar* x = wspace->var("x"); + RooRealVar* mu = wspace->var("mu"); + + // set confidence level + double confidenceLevel = 0.95; + + // example use profile likelihood calculator + ProfileLikelihoodCalculator plc(*data, *modelConfig); + plc.SetConfidenceLevel( confidenceLevel); + LikelihoodInterval* plInt = plc.GetInterval(); + + // example use of Feldman-Cousins + FeldmanCousins fc(*data, *modelConfig); + fc.SetConfidenceLevel( confidenceLevel); + fc.SetNBins(100); // number of points to test per parameter + fc.UseAdaptiveSampling(true); // make it go faster + + // Here, we consider only ensembles with 100 events + // The PDF could be extended and this could be removed + fc.FluctuateNumDataEntries(false); + + // Proof + // ProofConfig pc(*wspace, 4, "workers=4", kFALSE); // proof-lite + //ProofConfig pc(w, 8, "localhost"); // proof cluster at "localhost" + // ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); + // toymcsampler->SetProofConfig(&pc); // enable proof + + PointSetInterval* interval = (PointSetInterval*) fc.GetInterval(); + + + // example use of BayesianCalculator + // now we also need to specify a prior in the ModelConfig + wspace->factory("Uniform::prior(mu)"); + modelConfig->SetPriorPdf(*wspace->pdf("prior")); + + // example usage of BayesianCalculator + BayesianCalculator bc(*data, *modelConfig); + bc.SetConfidenceLevel( confidenceLevel); + SimpleInterval* bcInt = bc.GetInterval(); + + // example use of MCMCInterval + MCMCCalculator mc(*data, *modelConfig); + mc.SetConfidenceLevel( confidenceLevel); + // special options + mc.SetNumBins(200); // bins used internally for representing posterior + mc.SetNumBurnInSteps(500); // first N steps to be ignored as burn-in + mc.SetNumIters(100000); // how long to run chain + mc.SetLeftSideTailFraction(0.5); // for central interval + MCMCInterval* mcInt = mc.GetInterval(); + + // for this example we know the expected intervals + double expectedLL = data->mean(*x) + + ROOT::Math::normal_quantile( (1-confidenceLevel)/2,1) + / sqrt(data->numEntries()); + double expectedUL = data->mean(*x) + + ROOT::Math::normal_quantile_c((1-confidenceLevel)/2,1) + / sqrt(data->numEntries()) ; + + // Use the intervals + std::cout << "expected interval is [" << + expectedLL << ", " << + expectedUL << "]" << endl; + + cout << "plc interval is [" << + plInt->LowerLimit(*mu) << ", " << + plInt->UpperLimit(*mu) << "]" << endl; + + std::cout << "fc interval is ["<< + interval->LowerLimit(*mu) << " , " << + interval->UpperLimit(*mu) << "]" << endl; + + cout << "bc interval is [" << + bcInt->LowerLimit() << ", " << + bcInt->UpperLimit() << "]" << endl; + + cout << "mc interval is [" << + mcInt->LowerLimit(*mu) << ", " << + mcInt->UpperLimit(*mu) << "]" << endl; + + mu->setVal(0); + cout << "is mu=0 in the interval? " << + plInt->IsInInterval(RooArgSet(*mu)) << endl; + + + // make a reasonable style + gStyle->SetCanvasColor(0); + gStyle->SetCanvasBorderMode(0); + gStyle->SetPadBorderMode(0); + gStyle->SetPadColor(0); + gStyle->SetCanvasColor(0); + gStyle->SetTitleFillColor(0); + gStyle->SetFillColor(0); + gStyle->SetFrameFillColor(0); + gStyle->SetStatColor(0); + + + // some plots + TCanvas* canvas = new TCanvas("canvas"); + canvas->Divide(2,2); + + // plot the data + canvas->cd(1); + RooPlot* frame = x->frame(); + data->plotOn(frame); + data->statOn(frame); + frame->Draw(); + + // plot the profile likeihood + canvas->cd(2); + LikelihoodIntervalPlot plot(plInt); + plot.Draw(); + + // plot the MCMC interval + canvas->cd(3); + MCMCIntervalPlot* mcPlot = new MCMCIntervalPlot(*mcInt); + mcPlot->SetLineColor(kGreen); + mcPlot->SetLineWidth(2); + mcPlot->Draw(); + + canvas->cd(4); + RooPlot * bcPlot = bc.GetPosteriorPlot(); + bcPlot->Draw(); + + canvas->Update(); + + t.Stop(); + t.Print(); } diff --git a/tutorials/roostats/JeffreysPriorDemo.C b/tutorials/roostats/JeffreysPriorDemo.C index 41c13f48345f8..cd230cdabc6d8 100644 --- a/tutorials/roostats/JeffreysPriorDemo.C +++ b/tutorials/roostats/JeffreysPriorDemo.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// tutorial demonstrating and validates the RooJeffreysPrior class /// /// Jeffreys's prior is an 'objective prior' based on formal rules. @@ -47,163 +48,163 @@ using namespace RooFit; void JeffreysPriorDemo(){ - RooWorkspace w("w"); - w.factory("Uniform::u(x[0,1])"); - w.factory("mu[100,1,200]"); - w.factory("ExtendPdf::p(u,mu)"); + RooWorkspace w("w"); + w.factory("Uniform::u(x[0,1])"); + w.factory("mu[100,1,200]"); + w.factory("ExtendPdf::p(u,mu)"); - RooDataHist* asimov = w.pdf("p")->generateBinned(*w.var("x"),ExpectedData()); + RooDataHist* asimov = w.pdf("p")->generateBinned(*w.var("x"),ExpectedData()); - RooFitResult* res = w.pdf("p")->fitTo(*asimov,Save(),SumW2Error(kTRUE)); + RooFitResult* res = w.pdf("p")->fitTo(*asimov,Save(),SumW2Error(kTRUE)); - asimov->Print(); - res->Print(); - TMatrixDSym cov = res->covarianceMatrix(); - cout << "variance = " << (cov.Determinant()) << endl; - cout << "stdev = " << sqrt(cov.Determinant()) << endl; - cov.Invert(); - cout << "jeffreys = " << sqrt(cov.Determinant()) << endl; + asimov->Print(); + res->Print(); + TMatrixDSym cov = res->covarianceMatrix(); + cout << "variance = " << (cov.Determinant()) << endl; + cout << "stdev = " << sqrt(cov.Determinant()) << endl; + cov.Invert(); + cout << "jeffreys = " << sqrt(cov.Determinant()) << endl; - w.defineSet("poi","mu"); - w.defineSet("obs","x"); + w.defineSet("poi","mu"); + w.defineSet("obs","x"); - RooJeffreysPrior pi("jeffreys","jeffreys",*w.pdf("p"),*w.set("poi"),*w.set("obs")); + RooJeffreysPrior pi("jeffreys","jeffreys",*w.pdf("p"),*w.set("poi"),*w.set("obs")); - RooGenericPdf* test = new RooGenericPdf("test","test","1./sqrt(mu)",*w.set("poi")); + RooGenericPdf* test = new RooGenericPdf("test","test","1./sqrt(mu)",*w.set("poi")); - TCanvas* c1 = new TCanvas; - RooPlot* plot = w.var("mu")->frame(); - pi.plotOn(plot); - test->plotOn(plot,LineColor(kRed)); - plot->Draw(); + TCanvas* c1 = new TCanvas; + RooPlot* plot = w.var("mu")->frame(); + pi.plotOn(plot); + test->plotOn(plot,LineColor(kRed)); + plot->Draw(); } //_________________________________________________ void TestJeffreysGaussMean(){ - RooWorkspace w("w"); - w.factory("Gaussian::g(x[0,-20,20],mu[0,-5,5],sigma[1,0,10])"); - w.factory("n[10,.1,200]"); - w.factory("ExtendPdf::p(g,n)"); - w.var("sigma")->setConstant(); - w.var("n")->setConstant(); + RooWorkspace w("w"); + w.factory("Gaussian::g(x[0,-20,20],mu[0,-5,5],sigma[1,0,10])"); + w.factory("n[10,.1,200]"); + w.factory("ExtendPdf::p(g,n)"); + w.var("sigma")->setConstant(); + w.var("n")->setConstant(); - RooDataHist* asimov = w.pdf("p")->generateBinned(*w.var("x"),ExpectedData()); + RooDataHist* asimov = w.pdf("p")->generateBinned(*w.var("x"),ExpectedData()); - RooFitResult* res = w.pdf("p")->fitTo(*asimov,Save(),SumW2Error(kTRUE)); + RooFitResult* res = w.pdf("p")->fitTo(*asimov,Save(),SumW2Error(kTRUE)); - asimov->Print(); - res->Print(); - TMatrixDSym cov = res->covarianceMatrix(); - cout << "variance = " << (cov.Determinant()) << endl; - cout << "stdev = " << sqrt(cov.Determinant()) << endl; - cov.Invert(); - cout << "jeffreys = " << sqrt(cov.Determinant()) << endl; + asimov->Print(); + res->Print(); + TMatrixDSym cov = res->covarianceMatrix(); + cout << "variance = " << (cov.Determinant()) << endl; + cout << "stdev = " << sqrt(cov.Determinant()) << endl; + cov.Invert(); + cout << "jeffreys = " << sqrt(cov.Determinant()) << endl; - w.defineSet("poi","mu"); - w.defineSet("obs","x"); + w.defineSet("poi","mu"); + w.defineSet("obs","x"); - RooJeffreysPrior pi("jeffreys","jeffreys",*w.pdf("p"),*w.set("poi"),*w.set("obs")); + RooJeffreysPrior pi("jeffreys","jeffreys",*w.pdf("p"),*w.set("poi"),*w.set("obs")); - const RooArgSet* temp = w.set("poi"); - pi.getParameters(*temp)->Print(); + const RooArgSet* temp = w.set("poi"); + pi.getParameters(*temp)->Print(); - // return; - RooGenericPdf* test = new RooGenericPdf("test","test","1",*w.set("poi")); + // return; + RooGenericPdf* test = new RooGenericPdf("test","test","1",*w.set("poi")); - TCanvas* c1 = new TCanvas; - RooPlot* plot = w.var("mu")->frame(); - pi.plotOn(plot); - test->plotOn(plot,LineColor(kRed),LineStyle(kDotted)); - plot->Draw(); + TCanvas* c1 = new TCanvas; + RooPlot* plot = w.var("mu")->frame(); + pi.plotOn(plot); + test->plotOn(plot,LineColor(kRed),LineStyle(kDotted)); + plot->Draw(); } //_________________________________________________ void TestJeffreysGaussSigma(){ - // this one is VERY sensitive - // if the Gaussian is narrow ~ range(x)/nbins(x) then the peak isn't resolved - // and you get really bizzare shapes - // if the Gaussian is too wide range(x) ~ sigma then PDF gets renormalized - // and the PDF falls off too fast at high sigma - RooWorkspace w("w"); - w.factory("Gaussian::g(x[0,-20,20],mu[0,-5,5],sigma[1,1,5])"); - w.factory("n[100,.1,2000]"); - w.factory("ExtendPdf::p(g,n)"); - // w.var("sigma")->setConstant(); - w.var("mu")->setConstant(); - w.var("n")->setConstant(); - w.var("x")->setBins(301); - - RooDataHist* asimov = w.pdf("p")->generateBinned(*w.var("x"),ExpectedData()); - - RooFitResult* res = w.pdf("p")->fitTo(*asimov,Save(),SumW2Error(kTRUE)); - - asimov->Print(); - res->Print(); - TMatrixDSym cov = res->covarianceMatrix(); - cout << "variance = " << (cov.Determinant()) << endl; - cout << "stdev = " << sqrt(cov.Determinant()) << endl; - cov.Invert(); - cout << "jeffreys = " << sqrt(cov.Determinant()) << endl; - - w.defineSet("poi","sigma"); - w.defineSet("obs","x"); - - RooJeffreysPrior pi("jeffreys","jeffreys",*w.pdf("p"),*w.set("poi"),*w.set("obs")); - pi.specialIntegratorConfig(kTRUE)->getConfigSection("RooIntegrator1D").setRealValue("maxSteps",3); - - const RooArgSet* temp = w.set("poi"); - pi.getParameters(*temp)->Print(); - - RooGenericPdf* test = new RooGenericPdf("test","test","sqrt(2.)/sigma",*w.set("poi")); - - TCanvas* c1 = new TCanvas; - RooPlot* plot = w.var("sigma")->frame(); - pi.plotOn(plot); - test->plotOn(plot,LineColor(kRed),LineStyle(kDotted)); - plot->Draw(); + // this one is VERY sensitive + // if the Gaussian is narrow ~ range(x)/nbins(x) then the peak isn't resolved + // and you get really bizzare shapes + // if the Gaussian is too wide range(x) ~ sigma then PDF gets renormalized + // and the PDF falls off too fast at high sigma + RooWorkspace w("w"); + w.factory("Gaussian::g(x[0,-20,20],mu[0,-5,5],sigma[1,1,5])"); + w.factory("n[100,.1,2000]"); + w.factory("ExtendPdf::p(g,n)"); + // w.var("sigma")->setConstant(); + w.var("mu")->setConstant(); + w.var("n")->setConstant(); + w.var("x")->setBins(301); + + RooDataHist* asimov = w.pdf("p")->generateBinned(*w.var("x"),ExpectedData()); + + RooFitResult* res = w.pdf("p")->fitTo(*asimov,Save(),SumW2Error(kTRUE)); + + asimov->Print(); + res->Print(); + TMatrixDSym cov = res->covarianceMatrix(); + cout << "variance = " << (cov.Determinant()) << endl; + cout << "stdev = " << sqrt(cov.Determinant()) << endl; + cov.Invert(); + cout << "jeffreys = " << sqrt(cov.Determinant()) << endl; + + w.defineSet("poi","sigma"); + w.defineSet("obs","x"); + + RooJeffreysPrior pi("jeffreys","jeffreys",*w.pdf("p"),*w.set("poi"),*w.set("obs")); + pi.specialIntegratorConfig(kTRUE)->getConfigSection("RooIntegrator1D").setRealValue("maxSteps",3); + + const RooArgSet* temp = w.set("poi"); + pi.getParameters(*temp)->Print(); + + RooGenericPdf* test = new RooGenericPdf("test","test","sqrt(2.)/sigma",*w.set("poi")); + + TCanvas* c1 = new TCanvas; + RooPlot* plot = w.var("sigma")->frame(); + pi.plotOn(plot); + test->plotOn(plot,LineColor(kRed),LineStyle(kDotted)); + plot->Draw(); } //_________________________________________________ void TestJeffreysGaussMeanAndSigma(){ - // this one is VERY sensitive - // if the Gaussian is narrow ~ range(x)/nbins(x) then the peak isn't resolved - // and you get really bizzare shapes - // if the Gaussian is too wide range(x) ~ sigma then PDF gets renormalized - // and the PDF falls off too fast at high sigma - RooWorkspace w("w"); - w.factory("Gaussian::g(x[0,-20,20],mu[0,-5,5],sigma[1,1,5])"); - w.factory("n[100,.1,2000]"); - w.factory("ExtendPdf::p(g,n)"); - - w.var("n")->setConstant(); - w.var("x")->setBins(301); - - RooDataHist* asimov = w.pdf("p")->generateBinned(*w.var("x"),ExpectedData()); - - RooFitResult* res = w.pdf("p")->fitTo(*asimov,Save(),SumW2Error(kTRUE)); - - asimov->Print(); - res->Print(); - TMatrixDSym cov = res->covarianceMatrix(); - cout << "variance = " << (cov.Determinant()) << endl; - cout << "stdev = " << sqrt(cov.Determinant()) << endl; - cov.Invert(); - cout << "jeffreys = " << sqrt(cov.Determinant()) << endl; - - w.defineSet("poi","mu,sigma"); - w.defineSet("obs","x"); - - RooJeffreysPrior pi("jeffreys","jeffreys",*w.pdf("p"),*w.set("poi"),*w.set("obs")); - pi.specialIntegratorConfig(kTRUE)->getConfigSection("RooIntegrator1D").setRealValue("maxSteps",3); - - const RooArgSet* temp = w.set("poi"); - pi.getParameters(*temp)->Print(); - // return; - - TCanvas* c1 = new TCanvas; - TH1* Jeff2d = pi.createHistogram("2dJeffreys",*w.var("mu"),Binning(10),YVar(*w.var("sigma"),Binning(10))); - Jeff2d->Draw("surf"); + // this one is VERY sensitive + // if the Gaussian is narrow ~ range(x)/nbins(x) then the peak isn't resolved + // and you get really bizzare shapes + // if the Gaussian is too wide range(x) ~ sigma then PDF gets renormalized + // and the PDF falls off too fast at high sigma + RooWorkspace w("w"); + w.factory("Gaussian::g(x[0,-20,20],mu[0,-5,5],sigma[1,1,5])"); + w.factory("n[100,.1,2000]"); + w.factory("ExtendPdf::p(g,n)"); + + w.var("n")->setConstant(); + w.var("x")->setBins(301); + + RooDataHist* asimov = w.pdf("p")->generateBinned(*w.var("x"),ExpectedData()); + + RooFitResult* res = w.pdf("p")->fitTo(*asimov,Save(),SumW2Error(kTRUE)); + + asimov->Print(); + res->Print(); + TMatrixDSym cov = res->covarianceMatrix(); + cout << "variance = " << (cov.Determinant()) << endl; + cout << "stdev = " << sqrt(cov.Determinant()) << endl; + cov.Invert(); + cout << "jeffreys = " << sqrt(cov.Determinant()) << endl; + + w.defineSet("poi","mu,sigma"); + w.defineSet("obs","x"); + + RooJeffreysPrior pi("jeffreys","jeffreys",*w.pdf("p"),*w.set("poi"),*w.set("obs")); + pi.specialIntegratorConfig(kTRUE)->getConfigSection("RooIntegrator1D").setRealValue("maxSteps",3); + + const RooArgSet* temp = w.set("poi"); + pi.getParameters(*temp)->Print(); + // return; + + TCanvas* c1 = new TCanvas; + TH1* Jeff2d = pi.createHistogram("2dJeffreys",*w.var("mu"),Binning(10),YVar(*w.var("sigma"),Binning(10))); + Jeff2d->Draw("surf"); } diff --git a/tutorials/roostats/MultivariateGaussianTest.C b/tutorials/roostats/MultivariateGaussianTest.C index 612f97175ca37..1b61666043d74 100644 --- a/tutorials/roostats/MultivariateGaussianTest.C +++ b/tutorials/roostats/MultivariateGaussianTest.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// Comparison of MCMC and PLC in a multi-variate gaussian problem /// /// This tutorial produces an N-dimensional multivariate Gaussian diff --git a/tutorials/roostats/OneSidedFrequentistUpperLimitWithBands.C b/tutorials/roostats/OneSidedFrequentistUpperLimitWithBands.C index 81f81cb27ba00..4b9e9ea3fa4f6 100644 --- a/tutorials/roostats/OneSidedFrequentistUpperLimitWithBands.C +++ b/tutorials/roostats/OneSidedFrequentistUpperLimitWithBands.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// OneSidedFrequentistUpperLimitWithBands /// /// This is a standard demo that can be used with any ROOT file diff --git a/tutorials/roostats/StandardBayesianMCMCDemo.C b/tutorials/roostats/StandardBayesianMCMCDemo.C index 18a3e3e087ff4..69bc1d47d3b40 100644 --- a/tutorials/roostats/StandardBayesianMCMCDemo.C +++ b/tutorials/roostats/StandardBayesianMCMCDemo.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// Standard demo of the Bayesian MCMC calculator /// /// This is a standard demo that can be used with any ROOT file @@ -64,141 +65,141 @@ void StandardBayesianMCMCDemo(const char* infile = "", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData"){ - ///////////////////////////////////////////////////////////// - // First part is just to access a user-defined file - // or create the standard example file if it doesn't exist - //////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// - const char* filename = ""; - if (!strcmp(infile,"")) { - filename = "results/example_combined_GaussExample_model.root"; - bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code - // if file does not exists generate with histfactory - if (!fileExist) { + const char* filename = ""; + if (!strcmp(infile,"")) { + filename = "results/example_combined_GaussExample_model.root"; + bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code + // if file does not exists generate with histfactory + if (!fileExist) { #ifdef _WIN32 - cout << "HistFactory file cannot be generated on Windows - exit" << endl; - return; + cout << "HistFactory file cannot be generated on Windows - exit" << endl; + return; #endif - // Normally this would be run on the command line - cout <<"will run standard hist2workspace example"<ProcessLine(".! prepareHistFactory ."); - gROOT->ProcessLine(".! hist2workspace config/example.xml"); - cout <<"\n\n---------------------"<ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; } - else - filename = infile; - // Try to open the file - TFile *file = TFile::Open(filename); + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - // if input file was specified byt not found, quit - if(!file ){ - cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <GetPdf()->fitTo(*data,Save()); + ProposalHelper ph; + ph.SetVariables((RooArgSet&)fit->floatParsFinal()); + ph.SetCovMatrix(fit->covarianceMatrix()); + ph.SetUpdateProposalParameters(kTRUE); // auto-create mean vars and add mappings + ph.SetCacheSize(100); + ProposalFunction* pf = ph.GetProposalFunction(); + */ + + // this proposal function seems fairly robust + SequentialProposal sp(0.1); + ///////////////////////////////////////////// + // create and use the MCMCCalculator + // to find and plot the 95% credible interval + // on the parameter of interest as specified + // in the model config + MCMCCalculator mcmc(*data,*mc); + mcmc.SetConfidenceLevel(optMCMC.confLevel); // 95% interval + // mcmc.SetProposalFunction(*pf); + mcmc.SetProposalFunction(sp); + mcmc.SetNumIters(optMCMC.numIters); // Metropolis-Hastings algorithm iterations + mcmc.SetNumBurnInSteps(optMCMC.numBurnInSteps); // first N steps to be ignored as burn-in + + // default is the shortest interval. + if (optMCMC.intervalType == 0) mcmc.SetIntervalType(MCMCInterval::kShortest); // for shortest interval (not really needed) + if (optMCMC.intervalType == 1) mcmc.SetLeftSideTailFraction(0.5); // for central interval + if (optMCMC.intervalType == 2) mcmc.SetLeftSideTailFraction(0.); // for upper limit + + RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); + if (optMCMC.minPOI != -999) + firstPOI->setMin(optMCMC.minPOI); + if (optMCMC.maxPOI != -999) + firstPOI->setMax(optMCMC.maxPOI); + + MCMCInterval* interval = mcmc.GetInterval(); + + // make a plot + //TCanvas* c1 = + auto c1 = new TCanvas("IntervalPlot"); + MCMCIntervalPlot plot(*interval); + plot.Draw(); + + TCanvas* c2 = new TCanvas("extraPlots"); + const RooArgSet* list = mc->GetNuisanceParameters(); + if(list->getSize()>1){ + double n = list->getSize(); + int ny = TMath::CeilNint( sqrt(n) ); + int nx = TMath::CeilNint(double(n)/ny); + c2->Divide( nx,ny); + } + + // draw a scatter plot of chain results for poi vs each nuisance parameters + TIterator* it = mc->GetNuisanceParameters()->createIterator(); + RooRealVar* nuis = NULL; + int iPad=1; // iPad, that's funny + while( (nuis = (RooRealVar*) it->Next() )){ + c2->cd(iPad++); + plot.DrawChainScatter(*firstPOI,*nuis); + } + + // print out the iterval on the first Parameter of Interest + cout << "\n>>>> RESULT : " << optMCMC.confLevel*100 << "% interval on " <GetName()<<" is : ["<< + interval->LowerLimit(*firstPOI) << ", "<< + interval->UpperLimit(*firstPOI) <<"] "<Get(workspaceName); - if(!w){ - cout <<"workspace not found" << endl; - return; - } - - // get the modelConfig out of the file - ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - - // get the modelConfig out of the file - RooAbsData* data = w->data(dataName); - - // make sure ingredients are found - if(!data || !mc){ - w->Print(); - cout << "data or ModelConfig was not found" <GetPdf()->fitTo(*data,Save()); - ProposalHelper ph; - ph.SetVariables((RooArgSet&)fit->floatParsFinal()); - ph.SetCovMatrix(fit->covarianceMatrix()); - ph.SetUpdateProposalParameters(kTRUE); // auto-create mean vars and add mappings - ph.SetCacheSize(100); - ProposalFunction* pf = ph.GetProposalFunction(); - */ - - // this proposal function seems fairly robust - SequentialProposal sp(0.1); - ///////////////////////////////////////////// - // create and use the MCMCCalculator - // to find and plot the 95% credible interval - // on the parameter of interest as specified - // in the model config - MCMCCalculator mcmc(*data,*mc); - mcmc.SetConfidenceLevel(optMCMC.confLevel); // 95% interval - // mcmc.SetProposalFunction(*pf); - mcmc.SetProposalFunction(sp); - mcmc.SetNumIters(optMCMC.numIters); // Metropolis-Hastings algorithm iterations - mcmc.SetNumBurnInSteps(optMCMC.numBurnInSteps); // first N steps to be ignored as burn-in - - // default is the shortest interval. - if (optMCMC.intervalType == 0) mcmc.SetIntervalType(MCMCInterval::kShortest); // for shortest interval (not really needed) - if (optMCMC.intervalType == 1) mcmc.SetLeftSideTailFraction(0.5); // for central interval - if (optMCMC.intervalType == 2) mcmc.SetLeftSideTailFraction(0.); // for upper limit - - RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); - if (optMCMC.minPOI != -999) - firstPOI->setMin(optMCMC.minPOI); - if (optMCMC.maxPOI != -999) - firstPOI->setMax(optMCMC.maxPOI); - - MCMCInterval* interval = mcmc.GetInterval(); - - // make a plot - //TCanvas* c1 = - auto c1 = new TCanvas("IntervalPlot"); - MCMCIntervalPlot plot(*interval); - plot.Draw(); - - TCanvas* c2 = new TCanvas("extraPlots"); - const RooArgSet* list = mc->GetNuisanceParameters(); - if(list->getSize()>1){ - double n = list->getSize(); - int ny = TMath::CeilNint( sqrt(n) ); - int nx = TMath::CeilNint(double(n)/ny); - c2->Divide( nx,ny); - } - - // draw a scatter plot of chain results for poi vs each nuisance parameters - TIterator* it = mc->GetNuisanceParameters()->createIterator(); - RooRealVar* nuis = NULL; - int iPad=1; // iPad, that's funny - while( (nuis = (RooRealVar*) it->Next() )){ - c2->cd(iPad++); - plot.DrawChainScatter(*firstPOI,*nuis); - } - - // print out the iterval on the first Parameter of Interest - cout << "\n>>>> RESULT : " << optMCMC.confLevel*100 << "% interval on " <GetName()<<" is : ["<< - interval->LowerLimit(*firstPOI) << ", "<< - interval->UpperLimit(*firstPOI) <<"] "<Get(workspaceName); - if(!w){ - cout <<"workspace not found" << endl; - return; - } - - // get the modelConfig out of the file - ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - - // get the modelConfig out of the file - RooAbsData* data = w->data(dataName); - - // make sure ingredients are found - if(!data || !mc){ - w->Print(); - cout << "data or ModelConfig was not found" <GetParametersOfInterest()); - w->import(prior); - mc->SetPriorPdf(*w->pdf("prior")); - - // do without systematics - //mc->SetNuisanceParameters(RooArgSet() ); - if (nSigmaNuisance > 0) { - RooAbsPdf * pdf = mc->GetPdf(); - assert(pdf); - RooFitResult * res = pdf->fitTo(*data, Save(true), Minimizer(ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str()), Hesse(true), - PrintLevel(ROOT::Math::MinimizerOptions::DefaultPrintLevel()-1) ); - - res->Print(); - RooArgList nuisPar(*mc->GetNuisanceParameters()); - for (int i = 0; i < nuisPar.getSize(); ++i) { - RooRealVar * v = dynamic_cast (&nuisPar[i] ); - assert( v); - v->setMin( TMath::Max( v->getMin(), v->getVal() - nSigmaNuisance * v->getError() ) ); - v->setMax( TMath::Min( v->getMax(), v->getVal() + nSigmaNuisance * v->getError() ) ); - std::cout << "setting interval for nuisance " << v->GetName() << " : [ " << v->getMin() << " , " << v->getMax() << " ]" << std::endl; - } - } - - - BayesianCalculator bayesianCalc(*data,*mc); - bayesianCalc.SetConfidenceLevel(confLevel); // 95% interval - - // default of the calculator is central interval. here use shortest , central or upper limit depending on input - // doing a shortest interval might require a longer time since it requires a scan of the posterior function - if (intervalType == 0) bayesianCalc.SetShortestInterval(); // for shortest interval - if (intervalType == 1) bayesianCalc.SetLeftSideTailFraction(0.5); // for central interval - if (intervalType == 2) bayesianCalc.SetLeftSideTailFraction(0.); // for upper limit - - if (!integrationType.IsNull() ) { - bayesianCalc.SetIntegrationType(integrationType); // set integrationType - bayesianCalc.SetNumIters(nToys); // set number of ietrations (i.e. number of toys for MC integrations) - } - - // in case of toyMC make a nnuisance pdf - if (integrationType.Contains("TOYMC") ) { - RooAbsPdf * nuisPdf = RooStats::MakeNuisancePdf(*mc, "nuisance_pdf"); - cout << "using TOYMC integration: make nuisance pdf from the model " << std::endl; - nuisPdf->Print(); - bayesianCalc.ForceNuisancePdf(*nuisPdf); - scanPosterior = true; // for ToyMC the posterior is scanned anyway so used given points - } - - // compute interval by scanning the posterior function - if (scanPosterior) - bayesianCalc.SetScanOfPosterior(nScanPoints); - - RooRealVar* poi = (RooRealVar*) mc->GetParametersOfInterest()->first(); - if (maxPOI != -999 && maxPOI > poi->getMin()) - poi->setMax(maxPOI); - - - SimpleInterval* interval = bayesianCalc.GetInterval(); - - // print out the iterval on the first Parameter of Interest - cout << "\n>>>> RESULT : " << confLevel*100 << "% interval on " << poi->GetName()<<" is : ["<< - interval->LowerLimit() << ", "<< - interval->UpperLimit() <<"] "<Draw(); + ///////////////////////////////////////////////////////////// + // Tutorial starts here + //////////////////////////////////////////////////////////// + + // get the workspace out of the file + RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; + } + + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); + + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <GetParametersOfInterest()); + w->import(prior); + mc->SetPriorPdf(*w->pdf("prior")); + + // do without systematics + //mc->SetNuisanceParameters(RooArgSet() ); + if (nSigmaNuisance > 0) { + RooAbsPdf * pdf = mc->GetPdf(); + assert(pdf); + RooFitResult * res = pdf->fitTo(*data, Save(true), Minimizer(ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str()), Hesse(true), + PrintLevel(ROOT::Math::MinimizerOptions::DefaultPrintLevel()-1) ); + + res->Print(); + RooArgList nuisPar(*mc->GetNuisanceParameters()); + for (int i = 0; i < nuisPar.getSize(); ++i) { + RooRealVar * v = dynamic_cast (&nuisPar[i] ); + assert( v); + v->setMin( TMath::Max( v->getMin(), v->getVal() - nSigmaNuisance * v->getError() ) ); + v->setMax( TMath::Min( v->getMax(), v->getVal() + nSigmaNuisance * v->getError() ) ); + std::cout << "setting interval for nuisance " << v->GetName() << " : [ " << v->getMin() << " , " << v->getMax() << " ]" << std::endl; + } + } + + + BayesianCalculator bayesianCalc(*data,*mc); + bayesianCalc.SetConfidenceLevel(confLevel); // 95% interval + + // default of the calculator is central interval. here use shortest , central or upper limit depending on input + // doing a shortest interval might require a longer time since it requires a scan of the posterior function + if (intervalType == 0) bayesianCalc.SetShortestInterval(); // for shortest interval + if (intervalType == 1) bayesianCalc.SetLeftSideTailFraction(0.5); // for central interval + if (intervalType == 2) bayesianCalc.SetLeftSideTailFraction(0.); // for upper limit + + if (!integrationType.IsNull() ) { + bayesianCalc.SetIntegrationType(integrationType); // set integrationType + bayesianCalc.SetNumIters(nToys); // set number of ietrations (i.e. number of toys for MC integrations) + } + + // in case of toyMC make a nnuisance pdf + if (integrationType.Contains("TOYMC") ) { + RooAbsPdf * nuisPdf = RooStats::MakeNuisancePdf(*mc, "nuisance_pdf"); + cout << "using TOYMC integration: make nuisance pdf from the model " << std::endl; + nuisPdf->Print(); + bayesianCalc.ForceNuisancePdf(*nuisPdf); + scanPosterior = true; // for ToyMC the posterior is scanned anyway so used given points + } + + // compute interval by scanning the posterior function + if (scanPosterior) + bayesianCalc.SetScanOfPosterior(nScanPoints); + + RooRealVar* poi = (RooRealVar*) mc->GetParametersOfInterest()->first(); + if (maxPOI != -999 && maxPOI > poi->getMin()) + poi->setMax(maxPOI); + + + SimpleInterval* interval = bayesianCalc.GetInterval(); + + // print out the iterval on the first Parameter of Interest + cout << "\n>>>> RESULT : " << confLevel*100 << "% interval on " << poi->GetName()<<" is : ["<< + interval->LowerLimit() << ", "<< + interval->UpperLimit() <<"] "<Draw(); } diff --git a/tutorials/roostats/StandardFeldmanCousinsDemo.C b/tutorials/roostats/StandardFeldmanCousinsDemo.C index ddef894bd4578..f9a38aacb2c52 100644 --- a/tutorials/roostats/StandardFeldmanCousinsDemo.C +++ b/tutorials/roostats/StandardFeldmanCousinsDemo.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// Standard demo of the Feldman-Cousins calculator /// StandardFeldmanCousinsDemo /// @@ -51,133 +52,133 @@ void StandardFeldmanCousinsDemo(const char* infile = "", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData"){ - ///////////////////////////////////////////////////////////// - // First part is just to access a user-defined file - // or create the standard example file if it doesn't exist - //////////////////////////////////////////////////////////// - const char* filename = ""; - if (!strcmp(infile,"")) { - filename = "results/example_combined_GaussExample_model.root"; - bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code - // if file does not exists generate with histfactory - if (!fileExist) { + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + const char* filename = ""; + if (!strcmp(infile,"")) { + filename = "results/example_combined_GaussExample_model.root"; + bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code + // if file does not exists generate with histfactory + if (!fileExist) { #ifdef _WIN32 - cout << "HistFactory file cannot be generated on Windows - exit" << endl; - return; + cout << "HistFactory file cannot be generated on Windows - exit" << endl; + return; #endif - // Normally this would be run on the command line - cout <<"will run standard hist2workspace example"<ProcessLine(".! prepareHistFactory ."); - gROOT->ProcessLine(".! hist2workspace config/example.xml"); - cout <<"\n\n---------------------"<Get(workspaceName); - if(!w){ - cout <<"workspace not found" << endl; - return; - } - - // get the modelConfig out of the file - ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - - // get the modelConfig out of the file - RooAbsData* data = w->data(dataName); - - // make sure ingredients are found - if(!data || !mc){ - w->Print(); - cout << "data or ModelConfig was not found" <GetPdf()->canBeExtended()){ - if(data->numEntries()==1) - fc.FluctuateNumDataEntries(false); - else - cout <<"Not sure what to do about this model" <SetProofConfig(&pc); // enable proof - - - // Now get the interval - PointSetInterval* interval = fc.GetInterval(); - ConfidenceBelt* belt = fc.GetConfidenceBelt(); - - // print out the iterval on the first Parameter of Interest - RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); - cout << "\n95% interval on " <GetName()<<" is : ["<< - interval->LowerLimit(*firstPOI) << ", "<< - interval->UpperLimit(*firstPOI) <<"] "<numEntries(), - firstPOI->getMin(), - firstPOI->getMax()); - - // loop through the points that were tested and ask confidence belt - // what the upper/lower thresholds were. - // For FeldmanCousins, the lower cut off is always 0 - for(Int_t i=0; inumEntries(); ++i){ - tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); - double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); - double arMin = belt->GetAcceptanceRegionMax(*tmpPoint); - double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; - histOfThresholds->Fill(poiVal,arMax); - } - histOfThresholds->SetMinimum(0); - histOfThresholds->Draw(); + // Normally this would be run on the command line + cout <<"will run standard hist2workspace example"<ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; + } + + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); + + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <GetPdf()->canBeExtended()){ + if(data->numEntries()==1) + fc.FluctuateNumDataEntries(false); + else + cout <<"Not sure what to do about this model" <SetProofConfig(&pc); // enable proof + + + // Now get the interval + PointSetInterval* interval = fc.GetInterval(); + ConfidenceBelt* belt = fc.GetConfidenceBelt(); + + // print out the iterval on the first Parameter of Interest + RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); + cout << "\n95% interval on " <GetName()<<" is : ["<< + interval->LowerLimit(*firstPOI) << ", "<< + interval->UpperLimit(*firstPOI) <<"] "<numEntries(), + firstPOI->getMin(), + firstPOI->getMax()); + + // loop through the points that were tested and ask confidence belt + // what the upper/lower thresholds were. + // For FeldmanCousins, the lower cut off is always 0 + for(Int_t i=0; inumEntries(); ++i){ + tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); + double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); + double arMin = belt->GetAcceptanceRegionMax(*tmpPoint); + double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; + histOfThresholds->Fill(poiVal,arMax); + } + histOfThresholds->SetMinimum(0); + histOfThresholds->Draw(); } diff --git a/tutorials/roostats/StandardFrequentistDiscovery.C b/tutorials/roostats/StandardFrequentistDiscovery.C index 663c4e22b3571..63f4f98ec9f48 100644 --- a/tutorials/roostats/StandardFrequentistDiscovery.C +++ b/tutorials/roostats/StandardFrequentistDiscovery.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// StandardFrequentistDiscovery /// /// This is a standard demo that can be used with any ROOT file @@ -52,8 +53,6 @@ using namespace RooFit; using namespace RooStats; - - double StandardFrequentistDiscovery( const char* infile = "", const char* workspaceName = "channel1", diff --git a/tutorials/roostats/StandardHistFactoryPlotsWithCategories.C b/tutorials/roostats/StandardHistFactoryPlotsWithCategories.C index e0b82e09d96db..700c62c2e753d 100644 --- a/tutorials/roostats/StandardHistFactoryPlotsWithCategories.C +++ b/tutorials/roostats/StandardHistFactoryPlotsWithCategories.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// StandardHistFactoryPlotsWithCategories /// /// This is a standard demo that can be used with any ROOT file diff --git a/tutorials/roostats/StandardHypoTestDemo.C b/tutorials/roostats/StandardHypoTestDemo.C index b3afedd689544..16a5c52a8d099 100644 --- a/tutorials/roostats/StandardHypoTestDemo.C +++ b/tutorials/roostats/StandardHypoTestDemo.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// Standard tutorial macro for hypothesis test (for computing the discovery significance) using all /// RooStats hypotheiss tests calculators and test statistics. /// diff --git a/tutorials/roostats/StandardHypoTestInvDemo.C b/tutorials/roostats/StandardHypoTestInvDemo.C index 4db4e403983b8..9350f65754f72 100644 --- a/tutorials/roostats/StandardHypoTestInvDemo.C +++ b/tutorials/roostats/StandardHypoTestInvDemo.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// Standard tutorial macro for performing an inverted hypothesis test for computing an interval /// /// This macro will perform a scan of the p-values for computing the interval or limit diff --git a/tutorials/roostats/StandardProfileInspectorDemo.C b/tutorials/roostats/StandardProfileInspectorDemo.C index 8070a7d46b8b4..24ee95c97eb4e 100644 --- a/tutorials/roostats/StandardProfileInspectorDemo.C +++ b/tutorials/roostats/StandardProfileInspectorDemo.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// Standard demo of the ProfileInspector class /// StandardProfileInspectorDemo /// @@ -47,87 +48,87 @@ void StandardProfileInspectorDemo(const char* infile = "", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData") { - ///////////////////////////////////////////////////////////// - // First part is just to access a user-defined file - // or create the standard example file if it doesn't exist - //////////////////////////////////////////////////////////// - - const char* filename = ""; - if (!strcmp(infile,"")) { - filename = "results/example_combined_GaussExample_model.root"; - bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code - // if file does not exists generate with histfactory - if (!fileExist) { + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + + const char* filename = ""; + if (!strcmp(infile,"")) { + filename = "results/example_combined_GaussExample_model.root"; + bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code + // if file does not exists generate with histfactory + if (!fileExist) { #ifdef _WIN32 - cout << "HistFactory file cannot be generated on Windows - exit" << endl; - return; + cout << "HistFactory file cannot be generated on Windows - exit" << endl; + return; #endif - // Normally this would be run on the command line - cout <<"will run standard hist2workspace example"<ProcessLine(".! prepareHistFactory ."); - gROOT->ProcessLine(".! hist2workspace config/example.xml"); - cout <<"\n\n---------------------"<ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; } - else - filename = infile; - // Try to open the file - TFile *file = TFile::Open(filename); + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - // if input file was specified byt not found, quit - if(!file ){ - cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <GetSize()>4){ + double n = list->GetSize(); + int nx = (int)sqrt(n) ; + int ny = TMath::CeilNint(n/nx); + nx = TMath::CeilNint( sqrt(n) ); + c1->Divide(ny,nx); + } else + c1->Divide(list->GetSize()); + for(int i=0; iGetSize(); ++i){ + c1->cd(i+1); + list->At(i)->Draw("al"); + } - ///////////////////////////////////////////////////////////// - // Tutorial starts here - //////////////////////////////////////////////////////////// - - // get the workspace out of the file - RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); - if(!w){ - cout <<"workspace not found" << endl; - return; - } - - // get the modelConfig out of the file - ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - - // get the modelConfig out of the file - RooAbsData* data = w->data(dataName); - - // make sure ingredients are found - if(!data || !mc){ - w->Print(); - cout << "data or ModelConfig was not found" <GetSize()>4){ - double n = list->GetSize(); - int nx = (int)sqrt(n) ; - int ny = TMath::CeilNint(n/nx); - nx = TMath::CeilNint( sqrt(n) ); - c1->Divide(ny,nx); - } else - c1->Divide(list->GetSize()); - for(int i=0; iGetSize(); ++i){ - c1->cd(i+1); - list->At(i)->Draw("al"); - } - - cout << endl; + cout << endl; } diff --git a/tutorials/roostats/StandardProfileLikelihoodDemo.C b/tutorials/roostats/StandardProfileLikelihoodDemo.C index 974c241f52079..8ccb86020c29d 100644 --- a/tutorials/roostats/StandardProfileLikelihoodDemo.C +++ b/tutorials/roostats/StandardProfileLikelihoodDemo.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// Standard demo of the Profile Likelihood calculator /// StandardProfileLikelihoodDemo /// @@ -60,111 +61,111 @@ void StandardProfileLikelihoodDemo(const char* infile = "", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData"){ - double confLevel = optPL.confLevel; - double nScanPoints = optPL.nScanPoints; - bool plotAsTF1 = optPL.plotAsTF1; - double poiXMin = optPL.poiMinPlot; - double poiXMax = optPL.poiMaxPlot; - bool doHypoTest = optPL.doHypoTest; - double nullParamValue = optPL.nullValue; - - ///////////////////////////////////////////////////////////// - // First part is just to access a user-defined file - // or create the standard example file if it doesn't exist - //////////////////////////////////////////////////////////// - const char* filename = ""; - if (!strcmp(infile,"")) { - filename = "results/example_combined_GaussExample_model.root"; - bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code - // if file does not exists generate with histfactory - if (!fileExist) { + double confLevel = optPL.confLevel; + double nScanPoints = optPL.nScanPoints; + bool plotAsTF1 = optPL.plotAsTF1; + double poiXMin = optPL.poiMinPlot; + double poiXMax = optPL.poiMaxPlot; + bool doHypoTest = optPL.doHypoTest; + double nullParamValue = optPL.nullValue; + + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + const char* filename = ""; + if (!strcmp(infile,"")) { + filename = "results/example_combined_GaussExample_model.root"; + bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code + // if file does not exists generate with histfactory + if (!fileExist) { #ifdef _WIN32 - cout << "HistFactory file cannot be generated on Windows - exit" << endl; - return; + cout << "HistFactory file cannot be generated on Windows - exit" << endl; + return; #endif - // Normally this would be run on the command line - cout <<"will run standard hist2workspace example"<ProcessLine(".! prepareHistFactory ."); - gROOT->ProcessLine(".! hist2workspace config/example.xml"); - cout <<"\n\n---------------------"<ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; } - else - filename = infile; - // Try to open the file - TFile *file = TFile::Open(filename); + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - // if input file was specified byt not found, quit - if(!file ){ - cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); + + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <Get(workspaceName); - if(!w){ - cout <<"workspace not found" << endl; - return; - } - - // get the modelConfig out of the file - ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - - // get the modelConfig out of the file - RooAbsData* data = w->data(dataName); - - // make sure ingredients are found - if(!data || !mc){ - w->Print(); - cout << "data or ModelConfig was not found" <GetParametersOfInterest()->first(); - cout << "\n>>>> RESULT : " << confLevel*100 << "% interval on " <GetName()<<" is : ["<< - interval->LowerLimit(*firstPOI) << ", "<< - interval->UpperLimit(*firstPOI) <<"]\n "<GetName(), nullParamValue); - pl.SetNullParameters(nullparams); - std::cout << "Perform Test of Hypothesis : null Hypothesis is " << firstPOI->GetName() << nullParamValue << std::endl; - auto result = pl.GetHypoTest(); - std::cout << "\n>>>> Hypotheis Test Result \n"; - result->Print(); - } + ///////////////////////////////////////////// + // create and use the ProfileLikelihoodCalculator + // to find and plot the 95% confidence interval + // on the parameter of interest as specified + // in the model config + ProfileLikelihoodCalculator pl(*data,*mc); + pl.SetConfidenceLevel(confLevel); // 95% interval + LikelihoodInterval* interval = pl.GetInterval(); + + // print out the iterval on the first Parameter of Interest + RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); + cout << "\n>>>> RESULT : " << confLevel*100 << "% interval on " <GetName()<<" is : ["<< + interval->LowerLimit(*firstPOI) << ", "<< + interval->UpperLimit(*firstPOI) <<"]\n "<GetName(), nullParamValue); + pl.SetNullParameters(nullparams); + std::cout << "Perform Test of Hypothesis : null Hypothesis is " << firstPOI->GetName() << nullParamValue << std::endl; + auto result = pl.GetHypoTest(); + std::cout << "\n>>>> Hypotheis Test Result \n"; + result->Print(); + } } diff --git a/tutorials/roostats/StandardTestStatDistributionDemo.C b/tutorials/roostats/StandardTestStatDistributionDemo.C index f22cac89916fc..5a1260ce4d49f 100644 --- a/tutorials/roostats/StandardTestStatDistributionDemo.C +++ b/tutorials/roostats/StandardTestStatDistributionDemo.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// StandardTestStatDistributionDemo.C /// /// This simple script plots the sampling distribution of the profile likelihood @@ -60,140 +61,140 @@ void StandardTestStatDistributionDemo(const char* infile = "", const char* dataName = "obsData"){ - // the number of toy MC used to generate the distribution - int nToyMC = 1000; - // The parameter below is needed for asymptotic distribution to be chi-square, - // but set to false if your model is not numerically stable if mu<0 - bool allowNegativeMu=true; + // the number of toy MC used to generate the distribution + int nToyMC = 1000; + // The parameter below is needed for asymptotic distribution to be chi-square, + // but set to false if your model is not numerically stable if mu<0 + bool allowNegativeMu=true; - ///////////////////////////////////////////////////////////// - // First part is just to access a user-defined file - // or create the standard example file if it doesn't exist - //////////////////////////////////////////////////////////// - const char* filename = ""; - if (!strcmp(infile,"")) { - filename = "results/example_combined_GaussExample_model.root"; - bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code - // if file does not exists generate with histfactory - if (!fileExist) { + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + const char* filename = ""; + if (!strcmp(infile,"")) { + filename = "results/example_combined_GaussExample_model.root"; + bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code + // if file does not exists generate with histfactory + if (!fileExist) { #ifdef _WIN32 - cout << "HistFactory file cannot be generated on Windows - exit" << endl; - return; + cout << "HistFactory file cannot be generated on Windows - exit" << endl; + return; #endif - // Normally this would be run on the command line - cout <<"will run standard hist2workspace example"<ProcessLine(".! prepareHistFactory ."); - gROOT->ProcessLine(".! hist2workspace config/example.xml"); - cout <<"\n\n---------------------"<ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; } - else - filename = infile; - // Try to open the file - TFile *file = TFile::Open(filename); + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); + + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); - // if input file was specified byt not found, quit - if(!file ){ - cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <Print(); + ///////////////////////////////////////////////////////////// + // Now find the upper limit based on the asymptotic results + //////////////////////////////////////////////////////////// + RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); + ProfileLikelihoodCalculator plc(*data,*mc); + LikelihoodInterval* interval = plc.GetInterval(); + double plcUpperLimit = interval->UpperLimit(*firstPOI); + delete interval; + cout << "\n\n--------------------------------------"<GetName() << " = " << plcUpperLimit <GetParametersOfInterest()->getSize(); + if(nPOI>1){ + cout <<"not sure what to do with other parameters of interest, but here are their values"<GetParametersOfInterest()->Print("v"); + } + + ///////////////////////////////////////////// + // create thte test stat sampler + ProfileLikelihoodTestStat ts(*mc->GetPdf()); + + // to avoid effects from boundary and simplify asymptotic comparison, set min=-max + if(allowNegativeMu) + firstPOI->setMin(-1*firstPOI->getMax()); + + // temporary RooArgSet + RooArgSet poi; + poi.add(*mc->GetParametersOfInterest()); + + // create and configure the ToyMCSampler + ToyMCSampler sampler(ts,nToyMC); + sampler.SetPdf(*mc->GetPdf()); + sampler.SetObservables(*mc->GetObservables()); + sampler.SetGlobalObservables(*mc->GetGlobalObservables()); + if(!mc->GetPdf()->canBeExtended() && (data->numEntries()==1)){ + cout << "tell it to use 1 event"<setVal(plcUpperLimit); // set POI value for generation + sampler.SetParametersForTestStat(*mc->GetParametersOfInterest()); // set POI value for evaluation + + if (useProof) { + ProofConfig pc(*w, nworkers, "",false); + sampler.SetProofConfig(&pc); // enable proof + } - ///////////////////////////////////////////////////////////// - // Now get the data and workspace - //////////////////////////////////////////////////////////// - - // get the workspace out of the file - RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); - if(!w){ - cout <<"workspace not found" << endl; - return; - } - - // get the modelConfig out of the file - ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - - // get the modelConfig out of the file - RooAbsData* data = w->data(dataName); - - // make sure ingredients are found - if(!data || !mc){ - w->Print(); - cout << "data or ModelConfig was not found" <Print(); - ///////////////////////////////////////////////////////////// - // Now find the upper limit based on the asymptotic results - //////////////////////////////////////////////////////////// - RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); - ProfileLikelihoodCalculator plc(*data,*mc); - LikelihoodInterval* interval = plc.GetInterval(); - double plcUpperLimit = interval->UpperLimit(*firstPOI); - delete interval; - cout << "\n\n--------------------------------------"<GetName() << " = " << plcUpperLimit <GetParametersOfInterest()->getSize(); - if(nPOI>1){ - cout <<"not sure what to do with other parameters of interest, but here are their values"<GetParametersOfInterest()->Print("v"); - } - - ///////////////////////////////////////////// - // create thte test stat sampler - ProfileLikelihoodTestStat ts(*mc->GetPdf()); - - // to avoid effects from boundary and simplify asymptotic comparison, set min=-max - if(allowNegativeMu) - firstPOI->setMin(-1*firstPOI->getMax()); - - // temporary RooArgSet - RooArgSet poi; - poi.add(*mc->GetParametersOfInterest()); - - // create and configure the ToyMCSampler - ToyMCSampler sampler(ts,nToyMC); - sampler.SetPdf(*mc->GetPdf()); - sampler.SetObservables(*mc->GetObservables()); - sampler.SetGlobalObservables(*mc->GetGlobalObservables()); - if(!mc->GetPdf()->canBeExtended() && (data->numEntries()==1)){ - cout << "tell it to use 1 event"<setVal(plcUpperLimit); // set POI value for generation - sampler.SetParametersForTestStat(*mc->GetParametersOfInterest()); // set POI value for evaluation - - if (useProof) { - ProofConfig pc(*w, nworkers, "",false); - sampler.SetProofConfig(&pc); // enable proof - } - - firstPOI->setVal(plcUpperLimit); - RooArgSet allParameters; - allParameters.add(*mc->GetParametersOfInterest()); - allParameters.add(*mc->GetNuisanceParameters()); - allParameters.Print("v"); - - SamplingDistribution* sampDist = sampler.GetSamplingDistribution(allParameters); - SamplingDistPlot plot; - plot.AddSamplingDistribution(sampDist); - plot.GetTH1F(sampDist)->GetYaxis()->SetTitle(Form("f(-log #lambda(#mu=%.2f) | #mu=%.2f)",plcUpperLimit,plcUpperLimit)); - plot.SetAxisTitle(Form("-log #lambda(#mu=%.2f)",plcUpperLimit)); - - TCanvas* c1 = new TCanvas("c1"); - c1->SetLogy(); - plot.Draw(); - double min = plot.GetTH1F(sampDist)->GetXaxis()->GetXmin(); - double max = plot.GetTH1F(sampDist)->GetXaxis()->GetXmax(); - - TF1* f = new TF1("f",Form("2*ROOT::Math::chisquared_pdf(2*x,%d,0)",nPOI),min,max); - f->Draw("same"); - c1->SaveAs("standard_test_stat_distribution.pdf"); + firstPOI->setVal(plcUpperLimit); + RooArgSet allParameters; + allParameters.add(*mc->GetParametersOfInterest()); + allParameters.add(*mc->GetNuisanceParameters()); + allParameters.Print("v"); + + SamplingDistribution* sampDist = sampler.GetSamplingDistribution(allParameters); + SamplingDistPlot plot; + plot.AddSamplingDistribution(sampDist); + plot.GetTH1F(sampDist)->GetYaxis()->SetTitle(Form("f(-log #lambda(#mu=%.2f) | #mu=%.2f)",plcUpperLimit,plcUpperLimit)); + plot.SetAxisTitle(Form("-log #lambda(#mu=%.2f)",plcUpperLimit)); + + TCanvas* c1 = new TCanvas("c1"); + c1->SetLogy(); + plot.Draw(); + double min = plot.GetTH1F(sampDist)->GetXaxis()->GetXmin(); + double max = plot.GetTH1F(sampDist)->GetXaxis()->GetXmax(); + + TF1* f = new TF1("f",Form("2*ROOT::Math::chisquared_pdf(2*x,%d,0)",nPOI),min,max); + f->Draw("same"); + c1->SaveAs("standard_test_stat_distribution.pdf"); } diff --git a/tutorials/roostats/TestNonCentral.C b/tutorials/roostats/TestNonCentral.C index 2b0ce22936c8a..908c25176898b 100644 --- a/tutorials/roostats/TestNonCentral.C +++ b/tutorials/roostats/TestNonCentral.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// /// \macro_image /// \macro_output @@ -10,29 +11,29 @@ using namespace RooFit; void TestNonCentral(){ - RooWorkspace w("w"); - // k <2, must use sum - w.factory("NonCentralChiSquare::nc(x[0,50],k[1.99,0,5],lambda[5])"); - // kk > 2 can use bessel - w.factory("NonCentralChiSquare::ncc(x,kk[2.01,0,5],lambda)"); - // kk > 2, force sum - w.factory("NonCentralChiSquare::nccc(x,kk,lambda)"); - ((RooNonCentralChiSquare*)w.pdf("nccc"))->SetForceSum(true); + RooWorkspace w("w"); + // k <2, must use sum + w.factory("NonCentralChiSquare::nc(x[0,50],k[1.99,0,5],lambda[5])"); + // kk > 2 can use bessel + w.factory("NonCentralChiSquare::ncc(x,kk[2.01,0,5],lambda)"); + // kk > 2, force sum + w.factory("NonCentralChiSquare::nccc(x,kk,lambda)"); + ((RooNonCentralChiSquare*)w.pdf("nccc"))->SetForceSum(true); - // a normal "central" chi-square for comparision when lambda->0 - w.factory("ChiSquarePdf::cs(x,k)"); + // a normal "central" chi-square for comparision when lambda->0 + w.factory("ChiSquarePdf::cs(x,k)"); - //w.var("kk")->setVal(4.); // test a large kk + //w.var("kk")->setVal(4.); // test a large kk - RooDataSet* ncdata = w.pdf("nc")->generate(*w.var("x"),100); - RooDataSet* csdata = w.pdf("cs")->generate(*w.var("x"),100); - RooPlot* plot = w.var("x")->frame(); - ncdata->plotOn(plot,MarkerColor(kRed)); - csdata->plotOn(plot,MarkerColor(kBlue)); - w.pdf("nc")->plotOn(plot,LineColor(kRed)); - w.pdf("ncc")->plotOn(plot,LineColor(kGreen)); - w.pdf("nccc")->plotOn(plot,LineColor(kYellow),LineStyle(kDashed)); - w.pdf("cs")->plotOn(plot,LineColor(kBlue),LineStyle(kDotted)); - plot->Draw(); + RooDataSet* ncdata = w.pdf("nc")->generate(*w.var("x"),100); + RooDataSet* csdata = w.pdf("cs")->generate(*w.var("x"),100); + RooPlot* plot = w.var("x")->frame(); + ncdata->plotOn(plot,MarkerColor(kRed)); + csdata->plotOn(plot,MarkerColor(kBlue)); + w.pdf("nc")->plotOn(plot,LineColor(kRed)); + w.pdf("ncc")->plotOn(plot,LineColor(kGreen)); + w.pdf("nccc")->plotOn(plot,LineColor(kYellow),LineStyle(kDashed)); + w.pdf("cs")->plotOn(plot,LineColor(kBlue),LineStyle(kDotted)); + plot->Draw(); } diff --git a/tutorials/roostats/TwoSidedFrequentistUpperLimitWithBands.C b/tutorials/roostats/TwoSidedFrequentistUpperLimitWithBands.C index 1be0aa6270269..6e61f71c28a1c 100644 --- a/tutorials/roostats/TwoSidedFrequentistUpperLimitWithBands.C +++ b/tutorials/roostats/TwoSidedFrequentistUpperLimitWithBands.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// TwoSidedFrequentistUpperLimitWithBands /// /// @@ -133,343 +134,343 @@ void TwoSidedFrequentistUpperLimitWithBands(const char* infile = "", const char* dataName = "obsData") { - double confidenceLevel=0.95; - // degrade/improve number of pseudo-experiments used to define the confidence belt. - // value of 1 corresponds to default number of toys in the tail, which is 50/(1-confidenceLevel) - double additionalToysFac = 0.5; - int nPointsToScan = 20; // number of steps in the parameter of interest - int nToyMC = 200; // number of toys used to define the expected limit and band - - ///////////////////////////////////////////////////////////// - // First part is just to access a user-defined file - // or create the standard example file if it doesn't exist - //////////////////////////////////////////////////////////// - const char* filename = ""; - if (!strcmp(infile,"")) { - filename = "results/example_combined_GaussExample_model.root"; - bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code - // if file does not exists generate with histfactory - if (!fileExist) { + double confidenceLevel=0.95; + // degrade/improve number of pseudo-experiments used to define the confidence belt. + // value of 1 corresponds to default number of toys in the tail, which is 50/(1-confidenceLevel) + double additionalToysFac = 0.5; + int nPointsToScan = 20; // number of steps in the parameter of interest + int nToyMC = 200; // number of toys used to define the expected limit and band + + ///////////////////////////////////////////////////////////// + // First part is just to access a user-defined file + // or create the standard example file if it doesn't exist + //////////////////////////////////////////////////////////// + const char* filename = ""; + if (!strcmp(infile,"")) { + filename = "results/example_combined_GaussExample_model.root"; + bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code + // if file does not exists generate with histfactory + if (!fileExist) { #ifdef _WIN32 - cout << "HistFactory file cannot be generated on Windows - exit" << endl; - return; + cout << "HistFactory file cannot be generated on Windows - exit" << endl; + return; #endif - // Normally this would be run on the command line - cout <<"will run standard hist2workspace example"<ProcessLine(".! prepareHistFactory ."); - gROOT->ProcessLine(".! hist2workspace config/example.xml"); - cout <<"\n\n---------------------"<ProcessLine(".! prepareHistFactory ."); + gROOT->ProcessLine(".! hist2workspace config/example.xml"); + cout <<"\n\n---------------------"<Get(workspaceName); + if(!w){ + cout <<"workspace not found" << endl; + return; } - else - filename = infile; - // Try to open the file - TFile *file = TFile::Open(filename); + // get the modelConfig out of the file + ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); + + // get the modelConfig out of the file + RooAbsData* data = w->data(dataName); - // if input file was specified byt not found, quit - if(!file ){ - cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; + // make sure ingredients are found + if(!data || !mc){ + w->Print(); + cout << "data or ModelConfig was not found" <Get(workspaceName); - if(!w){ - cout <<"workspace not found" << endl; - return; - } - - // get the modelConfig out of the file - ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); - - // get the modelConfig out of the file - RooAbsData* data = w->data(dataName); - - // make sure ingredients are found - if(!data || !mc){ - w->Print(); - cout << "data or ModelConfig was not found" <Print(); - - ///////////////////////////////////////////////////////////// - // Now get the POI for convenience - // you may want to adjust the range of your POI - //////////////////////////////////////////////////////////// - RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); - // firstPOI->setMin(0); - // firstPOI->setMax(10); - - ///////////////////////////////////////////// - // create and use the FeldmanCousins tool - // to find and plot the 95% confidence interval - // on the parameter of interest as specified - // in the model config - // REMEMBER, we will change the test statistic - // so this is NOT a Feldman-Cousins interval - FeldmanCousins fc(*data,*mc); - fc.SetConfidenceLevel(confidenceLevel); - fc.AdditionalNToysFactor(additionalToysFac); // improve sampling that defines confidence belt - // fc.UseAdaptiveSampling(true); // speed it up a bit, but don't use for expectd limits - fc.SetNBins(nPointsToScan); // set how many points per parameter of interest to scan - fc.CreateConfBelt(true); // save the information in the belt for plotting - - ///////////////////////////////////////////// - // Feldman-Cousins is a unified limit by definition - // but the tool takes care of a few things for us like which values - // of the nuisance parameters should be used to generate toys. - // so let's just change the test statistic and realize this is - // no longer "Feldman-Cousins" but is a fully frequentist Neyman-Construction. - // fc.GetTestStatSampler()->SetTestStatistic(&onesided); - // ((ToyMCSampler*) fc.GetTestStatSampler())->SetGenerateBinned(true); - ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); - ProfileLikelihoodTestStat* testStat = dynamic_cast(toymcsampler->GetTestStatistic()); - - // Since this tool needs to throw toy MC the PDF needs to be - // extended or the tool needs to know how many entries in a dataset - // per pseudo experiment. - // In the 'number counting form' where the entries in the dataset - // are counts, and not values of discriminating variables, the - // datasets typically only have one entry and the PDF is not - // extended. - if(!mc->GetPdf()->canBeExtended()){ - if(data->numEntries()==1) - fc.FluctuateNumDataEntries(false); - else - cout <<"Not sure what to do about this model" <SetProofConfig(&pc); // enable proof - } - - if(mc->GetGlobalObservables()){ - cout << "will use global observables for unconditional ensemble"<GetGlobalObservables()->Print(); - toymcsampler->SetGlobalObservables(*mc->GetGlobalObservables()); - } - - - // Now get the interval - PointSetInterval* interval = fc.GetInterval(); - ConfidenceBelt* belt = fc.GetConfidenceBelt(); - - // print out the iterval on the first Parameter of Interest - cout << "\n95% interval on " <GetName()<<" is : ["<< - interval->LowerLimit(*firstPOI) << ", "<< - interval->UpperLimit(*firstPOI) <<"] "<UpperLimit(*firstPOI); - firstPOI->setVal(observedUL); - double obsTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*data,tmpPOI); - - - // Ask the calculator which points were scanned - RooDataSet* parameterScan = (RooDataSet*) fc.GetPointsToScan(); - RooArgSet* tmpPoint; - - // make a histogram of parameter vs. threshold - TH1F* histOfThresholds = new TH1F("histOfThresholds","", - parameterScan->numEntries(), - firstPOI->getMin(), - firstPOI->getMax()); - histOfThresholds->GetXaxis()->SetTitle(firstPOI->GetName()); - histOfThresholds->GetYaxis()->SetTitle("Threshold"); - - // loop through the points that were tested and ask confidence belt - // what the upper/lower thresholds were. - // For FeldmanCousins, the lower cut off is always 0 - for(Int_t i=0; inumEntries(); ++i){ - tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); - //cout <<"get threshold"<GetAcceptanceRegionMax(*tmpPoint); - double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; - histOfThresholds->Fill(poiVal,arMax); - } - TCanvas* c1 = new TCanvas(); - c1->Divide(2); - c1->cd(1); - histOfThresholds->SetMinimum(0); - histOfThresholds->Draw(); - c1->cd(2); - - ///////////////////////////////////////////////////////////// - // Now we generate the expected bands and power-constriant - //////////////////////////////////////////////////////////// - - // First: find parameter point for mu=0, with conditional MLEs for nuisance parameters - RooAbsReal* nll = mc->GetPdf()->createNLL(*data); - RooAbsReal* profile = nll->createProfile(*mc->GetParametersOfInterest()); - firstPOI->setVal(0.); - profile->getVal(); // this will do fit and set nuisance parameters to profiled values - RooArgSet* poiAndNuisance = new RooArgSet(); - if(mc->GetNuisanceParameters()) - poiAndNuisance->add(*mc->GetNuisanceParameters()); - poiAndNuisance->add(*mc->GetParametersOfInterest()); - w->saveSnapshot("paramsToGenerateData",*poiAndNuisance); - RooArgSet* paramsToGenerateData = (RooArgSet*) poiAndNuisance->snapshot(); - cout << "\nWill use these parameter points to generate pseudo data for bkg only" << endl; - paramsToGenerateData->Print("v"); - - - RooArgSet unconditionalObs; - unconditionalObs.add(*mc->GetObservables()); - unconditionalObs.add(*mc->GetGlobalObservables()); // comment this out for the original conditional ensemble - - double CLb=0; - double CLbinclusive=0; - - // Now we generate background only and find distribution of upper limits - TH1F* histOfUL = new TH1F("histOfUL","",100,0,firstPOI->getMax()); - histOfUL->GetXaxis()->SetTitle("Upper Limit (background only)"); - histOfUL->GetYaxis()->SetTitle("Entries"); - for(int imc=0; imcloadSnapshot("paramsToGenerateData"); - // poiAndNuisance->Print("v"); - - RooDataSet* toyData = 0; - // now generate a toy dataset for the main measurement - if(!mc->GetPdf()->canBeExtended()){ - if(data->numEntries()==1) - toyData = mc->GetPdf()->generate(*mc->GetObservables(),1); - else - cout <<"Not sure what to do about this model" <GetPdf()->generate(*mc->GetObservables(),Extended()); - } - - // generate global observables - // need to be careful for simpdf. - // In ROOT 5.28 there is a problem with generating global observables - // with a simultaneous PDF. In 5.29 there is a solution with - // RooSimultaneous::generateSimGlobal, but this may change to - // the standard generate interface in 5.30. - - RooSimultaneous* simPdf = dynamic_cast(mc->GetPdf()); - if(!simPdf){ - RooDataSet *one = mc->GetPdf()->generate(*mc->GetGlobalObservables(), 1); - const RooArgSet *values = one->get(); - RooArgSet *allVars = mc->GetPdf()->getVariables(); - *allVars = *values; - delete allVars; - delete one; - } else { - RooDataSet* one = simPdf->generateSimGlobal(*mc->GetGlobalObservables(),1); - const RooArgSet *values = one->get(); - RooArgSet *allVars = mc->GetPdf()->getVariables(); - *allVars = *values; - delete allVars; - delete one; - - } - - - // get test stat at observed UL in observed data - firstPOI->setVal(observedUL); - double toyTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); - // toyData->get()->Print("v"); - // cout <<"obsTSatObsUL " <numEntries(); ++i){ + cout << "Found data and ModelConfig:" <Print(); + + ///////////////////////////////////////////////////////////// + // Now get the POI for convenience + // you may want to adjust the range of your POI + //////////////////////////////////////////////////////////// + RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); + // firstPOI->setMin(0); + // firstPOI->setMax(10); + + ///////////////////////////////////////////// + // create and use the FeldmanCousins tool + // to find and plot the 95% confidence interval + // on the parameter of interest as specified + // in the model config + // REMEMBER, we will change the test statistic + // so this is NOT a Feldman-Cousins interval + FeldmanCousins fc(*data,*mc); + fc.SetConfidenceLevel(confidenceLevel); + fc.AdditionalNToysFactor(additionalToysFac); // improve sampling that defines confidence belt + // fc.UseAdaptiveSampling(true); // speed it up a bit, but don't use for expectd limits + fc.SetNBins(nPointsToScan); // set how many points per parameter of interest to scan + fc.CreateConfBelt(true); // save the information in the belt for plotting + + ///////////////////////////////////////////// + // Feldman-Cousins is a unified limit by definition + // but the tool takes care of a few things for us like which values + // of the nuisance parameters should be used to generate toys. + // so let's just change the test statistic and realize this is + // no longer "Feldman-Cousins" but is a fully frequentist Neyman-Construction. + // fc.GetTestStatSampler()->SetTestStatistic(&onesided); + // ((ToyMCSampler*) fc.GetTestStatSampler())->SetGenerateBinned(true); + ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); + ProfileLikelihoodTestStat* testStat = dynamic_cast(toymcsampler->GetTestStatistic()); + + // Since this tool needs to throw toy MC the PDF needs to be + // extended or the tool needs to know how many entries in a dataset + // per pseudo experiment. + // In the 'number counting form' where the entries in the dataset + // are counts, and not values of discriminating variables, the + // datasets typically only have one entry and the PDF is not + // extended. + if(!mc->GetPdf()->canBeExtended()){ + if(data->numEntries()==1) + fc.FluctuateNumDataEntries(false); + else + cout <<"Not sure what to do about this model" <SetProofConfig(&pc); // enable proof + } + + if(mc->GetGlobalObservables()){ + cout << "will use global observables for unconditional ensemble"<GetGlobalObservables()->Print(); + toymcsampler->SetGlobalObservables(*mc->GetGlobalObservables()); + } + + + // Now get the interval + PointSetInterval* interval = fc.GetInterval(); + ConfidenceBelt* belt = fc.GetConfidenceBelt(); + + // print out the iterval on the first Parameter of Interest + cout << "\n95% interval on " <GetName()<<" is : ["<< + interval->LowerLimit(*firstPOI) << ", "<< + interval->UpperLimit(*firstPOI) <<"] "<UpperLimit(*firstPOI); + firstPOI->setVal(observedUL); + double obsTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*data,tmpPOI); + + + // Ask the calculator which points were scanned + RooDataSet* parameterScan = (RooDataSet*) fc.GetPointsToScan(); + RooArgSet* tmpPoint; + + // make a histogram of parameter vs. threshold + TH1F* histOfThresholds = new TH1F("histOfThresholds","", + parameterScan->numEntries(), + firstPOI->getMin(), + firstPOI->getMax()); + histOfThresholds->GetXaxis()->SetTitle(firstPOI->GetName()); + histOfThresholds->GetYaxis()->SetTitle("Threshold"); + + // loop through the points that were tested and ask confidence belt + // what the upper/lower thresholds were. + // For FeldmanCousins, the lower cut off is always 0 + for(Int_t i=0; inumEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); + //cout <<"get threshold"<GetAcceptanceRegionMax(*tmpPoint); - firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); - // double thisTS = profile->getVal(); - double thisTS = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); - - // cout << "poi = " << firstPOI->getVal() - // << " max is " << arMax << " this profile = " << thisTS << endl; - // cout << "thisTS = " << thisTS<getVal(); + double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; + histOfThresholds->Fill(poiVal,arMax); + } + TCanvas* c1 = new TCanvas(); + c1->Divide(2); + c1->cd(1); + histOfThresholds->SetMinimum(0); + histOfThresholds->Draw(); + c1->cd(2); + + ///////////////////////////////////////////////////////////// + // Now we generate the expected bands and power-constriant + //////////////////////////////////////////////////////////// + + // First: find parameter point for mu=0, with conditional MLEs for nuisance parameters + RooAbsReal* nll = mc->GetPdf()->createNLL(*data); + RooAbsReal* profile = nll->createProfile(*mc->GetParametersOfInterest()); + firstPOI->setVal(0.); + profile->getVal(); // this will do fit and set nuisance parameters to profiled values + RooArgSet* poiAndNuisance = new RooArgSet(); + if(mc->GetNuisanceParameters()) + poiAndNuisance->add(*mc->GetNuisanceParameters()); + poiAndNuisance->add(*mc->GetParametersOfInterest()); + w->saveSnapshot("paramsToGenerateData",*poiAndNuisance); + RooArgSet* paramsToGenerateData = (RooArgSet*) poiAndNuisance->snapshot(); + cout << "\nWill use these parameter points to generate pseudo data for bkg only" << endl; + paramsToGenerateData->Print("v"); + + + RooArgSet unconditionalObs; + unconditionalObs.add(*mc->GetObservables()); + unconditionalObs.add(*mc->GetGlobalObservables()); // comment this out for the original conditional ensemble + + double CLb=0; + double CLbinclusive=0; + + // Now we generate background only and find distribution of upper limits + TH1F* histOfUL = new TH1F("histOfUL","",100,0,firstPOI->getMax()); + histOfUL->GetXaxis()->SetTitle("Upper Limit (background only)"); + histOfUL->GetYaxis()->SetTitle("Entries"); + for(int imc=0; imcloadSnapshot("paramsToGenerateData"); + // poiAndNuisance->Print("v"); + + RooDataSet* toyData = 0; + // now generate a toy dataset for the main measurement + if(!mc->GetPdf()->canBeExtended()){ + if(data->numEntries()==1) + toyData = mc->GetPdf()->generate(*mc->GetObservables(),1); + else + cout <<"Not sure what to do about this model" <GetPdf()->generate(*mc->GetObservables(),Extended()); } - } - - - histOfUL->Fill(thisUL); - - // for few events, data is often the same, and UL is often the same - // cout << "thisUL = " << thisUL<Draw(); - c1->SaveAs("two-sided_upper_limit_output.pdf"); - - // if you want to see a plot of the sampling distribution for a particular scan point: - /* - SamplingDistPlot sampPlot; - int indexInScan = 0; - tmpPoint = (RooArgSet*) parameterScan->get(indexInScan)->clone("temp"); - firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); - toymcsampler->SetParametersForTestStat(tmpPOI); - SamplingDistribution* samp = toymcsampler->GetSamplingDistribution(*tmpPoint); - sampPlot.AddSamplingDistribution(samp); - sampPlot.Draw(); - */ - - // Now find bands and power constraint - Double_t* bins = histOfUL->GetIntegral(); - TH1F* cumulative = (TH1F*) histOfUL->Clone("cumulative"); - cumulative->SetContent(bins); - double band2sigDown=0, band1sigDown=0, bandMedian=0, band1sigUp=0,band2sigUp=0; - for(int i=1; i<=cumulative->GetNbinsX(); ++i){ - if(bins[i]GetBinCenter(i); - if(bins[i]GetBinCenter(i); - if(bins[i]<0.5) - bandMedian=cumulative->GetBinCenter(i); - if(bins[i]GetBinCenter(i); - if(bins[i]GetBinCenter(i); - } - cout << "-2 sigma band " << band2sigDown << endl; - cout << "-1 sigma band " << band1sigDown << " [Power Constriant)]" << endl; - cout << "median of band " << bandMedian << endl; - cout << "+1 sigma band " << band1sigUp << endl; - cout << "+2 sigma band " << band2sigUp << endl; - - // print out the iterval on the first Parameter of Interest - cout << "\nobserved 95% upper-limit "<< interval->UpperLimit(*firstPOI) <obs|0)] for observed 95% upper-limit "<< CLb <=obs|0)] for observed 95% upper-limit "<< CLbinclusive <(mc->GetPdf()); + if(!simPdf){ + RooDataSet *one = mc->GetPdf()->generate(*mc->GetGlobalObservables(), 1); + const RooArgSet *values = one->get(); + RooArgSet *allVars = mc->GetPdf()->getVariables(); + *allVars = *values; + delete allVars; + delete one; + } else { + RooDataSet* one = simPdf->generateSimGlobal(*mc->GetGlobalObservables(),1); + const RooArgSet *values = one->get(); + RooArgSet *allVars = mc->GetPdf()->getVariables(); + *allVars = *values; + delete allVars; + delete one; + + } + + + // get test stat at observed UL in observed data + firstPOI->setVal(observedUL); + double toyTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); + // toyData->get()->Print("v"); + // cout <<"obsTSatObsUL " <numEntries(); ++i){ + tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); + double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); + firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); + // double thisTS = profile->getVal(); + double thisTS = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); + + // cout << "poi = " << firstPOI->getVal() + // << " max is " << arMax << " this profile = " << thisTS << endl; + // cout << "thisTS = " << thisTS<getVal(); + } else{ + break; + } + } + + + histOfUL->Fill(thisUL); + + // for few events, data is often the same, and UL is often the same + // cout << "thisUL = " << thisUL<Draw(); + c1->SaveAs("two-sided_upper_limit_output.pdf"); + + // if you want to see a plot of the sampling distribution for a particular scan point: + /* + SamplingDistPlot sampPlot; + int indexInScan = 0; + tmpPoint = (RooArgSet*) parameterScan->get(indexInScan)->clone("temp"); + firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); + toymcsampler->SetParametersForTestStat(tmpPOI); + SamplingDistribution* samp = toymcsampler->GetSamplingDistribution(*tmpPoint); + sampPlot.AddSamplingDistribution(samp); + sampPlot.Draw(); + */ + + // Now find bands and power constraint + Double_t* bins = histOfUL->GetIntegral(); + TH1F* cumulative = (TH1F*) histOfUL->Clone("cumulative"); + cumulative->SetContent(bins); + double band2sigDown=0, band1sigDown=0, bandMedian=0, band1sigUp=0,band2sigUp=0; + for(int i=1; i<=cumulative->GetNbinsX(); ++i){ + if(bins[i]GetBinCenter(i); + if(bins[i]GetBinCenter(i); + if(bins[i]<0.5) + bandMedian=cumulative->GetBinCenter(i); + if(bins[i]GetBinCenter(i); + if(bins[i]GetBinCenter(i); + } + cout << "-2 sigma band " << band2sigDown << endl; + cout << "-1 sigma band " << band1sigDown << " [Power Constriant)]" << endl; + cout << "median of band " << bandMedian << endl; + cout << "+1 sigma band " << band1sigUp << endl; + cout << "+2 sigma band " << band2sigUp << endl; + + // print out the iterval on the first Parameter of Interest + cout << "\nobserved 95% upper-limit "<< interval->UpperLimit(*firstPOI) <obs|0)] for observed 95% upper-limit "<< CLb <=obs|0)] for observed 95% upper-limit "<< CLbinclusive <factory("Poisson::px(x[150,0,500],sum::splusb(s[0,0,100],b[100,0,300]))"); - w->factory("Poisson::py(y[100,0,500],prod::taub(tau[1.],b))"); - w->factory("Uniform::prior_b(b)"); + // Make model for prototype on/off problem + // Pois(x | s+b) * Pois(y | tau b ) + // for Z_Gamma, use uniform prior on b. + RooWorkspace* w = new RooWorkspace("w",true); + w->factory("Poisson::px(x[150,0,500],sum::splusb(s[0,0,100],b[100,0,300]))"); + w->factory("Poisson::py(y[100,0,500],prod::taub(tau[1.],b))"); + w->factory("Uniform::prior_b(b)"); - // construct the Bayesian-averaged model (eg. a projection pdf) - // p'(x|s) = \int db p(x|s+b) * [ p(y|b) * prior(b) ] - w->factory("PROJ::averagedModel(PROD::foo(px|b,py,prior_b),b)") ; + // construct the Bayesian-averaged model (eg. a projection pdf) + // p'(x|s) = \int db p(x|s+b) * [ p(y|b) * prior(b) ] + w->factory("PROJ::averagedModel(PROD::foo(px|b,py,prior_b),b)") ; - // plot it, blue is averaged model, red is b known exactly - RooPlot* frame = w->var("x")->frame() ; - w->pdf("averagedModel")->plotOn(frame) ; - w->pdf("px")->plotOn(frame,LineColor(kRed)) ; - frame->Draw() ; + // plot it, blue is averaged model, red is b known exactly + RooPlot* frame = w->var("x")->frame() ; + w->pdf("averagedModel")->plotOn(frame) ; + w->pdf("px")->plotOn(frame,LineColor(kRed)) ; + frame->Draw() ; - // compare analytic calculation of Z_Bi - // with the numerical RooFit implementation of Z_Gamma - // for an example with x = 150, y = 100 + // compare analytic calculation of Z_Bi + // with the numerical RooFit implementation of Z_Gamma + // for an example with x = 150, y = 100 - // numeric RooFit Z_Gamma - w->var("y")->setVal(100); - w->var("x")->setVal(150); - RooAbsReal* cdf = w->pdf("averagedModel")->createCdf(*w->var("x")); - cdf->getVal(); // get ugly print messages out of the way + // numeric RooFit Z_Gamma + w->var("y")->setVal(100); + w->var("x")->setVal(150); + RooAbsReal* cdf = w->pdf("averagedModel")->createCdf(*w->var("x")); + cdf->getVal(); // get ugly print messages out of the way - cout << "Hybrid p-value = " << cdf->getVal() << endl; - cout << "Z_Gamma Significance = " << - PValueToSignificance(1-cdf->getVal()) << endl; + cout << "Hybrid p-value = " << cdf->getVal() << endl; + cout << "Z_Gamma Significance = " << + PValueToSignificance(1-cdf->getVal()) << endl; - // analytic Z_Bi - double Z_Bi = NumberCountingUtils::BinomialWithTauObsZ(150, 100, 1); - std::cout << "Z_Bi significance estimation: " << Z_Bi << std::endl; + // analytic Z_Bi + double Z_Bi = NumberCountingUtils::BinomialWithTauObsZ(150, 100, 1); + std::cout << "Z_Bi significance estimation: " << Z_Bi << std::endl; - // OUTPUT - // Hybrid p-value = 0.999058 - // Z_Gamma Significance = 3.10804 - // Z_Bi significance estimation: 3.10804 + // OUTPUT + // Hybrid p-value = 0.999058 + // Z_Gamma Significance = 3.10804 + // Z_Bi significance estimation: 3.10804 } diff --git a/tutorials/roostats/rs101_limitexample.C b/tutorials/roostats/rs101_limitexample.C index 0ba34121a8939..3fea9f4f7742b 100644 --- a/tutorials/roostats/rs101_limitexample.C +++ b/tutorials/roostats/rs101_limitexample.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// 'Limit Example' RooStats tutorial macro #101 /// This tutorial shows an example of creating a simple /// model for a number counting experiment with uncertainty @@ -12,10 +13,6 @@ /// /// \author Kyle Cranmer -#ifndef __CINT__ -#include "RooGlobalFunc.h" -#endif - #include "RooProfileLL.h" #include "RooAbsPdf.h" #include "RooStats/HypoTestResult.h" @@ -47,194 +44,194 @@ #include "TGraph2D.h" // use this order for safety on library loading -using namespace RooFit ; -using namespace RooStats ; +using namespace RooFit; +using namespace RooStats; void rs101_limitexample() { - ///////////////////////////////////////// - // An example of setting a limit in a number counting experiment with uncertainty on background and signal - ///////////////////////////////////////// - - // to time the macro - TStopwatch t; - t.Start(); - - ///////////////////////////////////////// - // The Model building stage - ///////////////////////////////////////// - RooWorkspace* wspace = new RooWorkspace(); - wspace->factory("Poisson::countingModel(obs[150,0,300], sum(s[50,0,120]*ratioSigEff[1.,0,3.],b[100]*ratioBkgEff[1.,0.,3.]))"); // counting model - // wspace->factory("Gaussian::sigConstraint(ratioSigEff,1,0.05)"); // 5% signal efficiency uncertainty - // wspace->factory("Gaussian::bkgConstraint(ratioBkgEff,1,0.1)"); // 10% background efficiency uncertainty - wspace->factory("Gaussian::sigConstraint(gSigEff[1,0,3],ratioSigEff,0.05)"); // 5% signal efficiency uncertainty - wspace->factory("Gaussian::bkgConstraint(gSigBkg[1,0,3],ratioBkgEff,0.2)"); // 10% background efficiency uncertainty - wspace->factory("PROD::modelWithConstraints(countingModel,sigConstraint,bkgConstraint)"); // product of terms - wspace->Print(); - - RooAbsPdf* modelWithConstraints = wspace->pdf("modelWithConstraints"); // get the model - RooRealVar* obs = wspace->var("obs"); // get the observable - RooRealVar* s = wspace->var("s"); // get the signal we care about - RooRealVar* b = wspace->var("b"); // get the background and set it to a constant. Uncertainty included in ratioBkgEff - b->setConstant(); - - RooRealVar* ratioSigEff = wspace->var("ratioSigEff"); // get uncertaint parameter to constrain - RooRealVar* ratioBkgEff = wspace->var("ratioBkgEff"); // get uncertaint parameter to constrain - RooArgSet constrainedParams(*ratioSigEff, *ratioBkgEff); // need to constrain these in the fit (should change default behavior) - - RooRealVar * gSigEff = wspace->var("gSigEff"); // global observables for signal efficiency - RooRealVar * gSigBkg = wspace->var("gSigBkg"); // global obs for background efficiency - gSigEff->setConstant(); - gSigBkg->setConstant(); - - // Create an example dataset with 160 observed events - obs->setVal(160.); - RooDataSet* data = new RooDataSet("exampleData", "exampleData", RooArgSet(*obs)); - data->add(*obs); - - RooArgSet all(*s, *ratioBkgEff, *ratioSigEff); - - // not necessary - modelWithConstraints->fitTo(*data, RooFit::Constrain(RooArgSet(*ratioSigEff, *ratioBkgEff))); - - // Now let's make some confidence intervals for s, our parameter of interest - RooArgSet paramOfInterest(*s); - - ModelConfig modelConfig(wspace); - modelConfig.SetPdf(*modelWithConstraints); - modelConfig.SetParametersOfInterest(paramOfInterest); - modelConfig.SetNuisanceParameters(constrainedParams); - modelConfig.SetObservables(*obs); - modelConfig.SetGlobalObservables( RooArgSet(*gSigEff,*gSigBkg)); - modelConfig.SetName("ModelConfig"); - wspace->import(modelConfig); - wspace->import(*data); - wspace->SetName("w"); - wspace->writeToFile("rs101_ws.root"); - - - - // First, let's use a Calculator based on the Profile Likelihood Ratio - //ProfileLikelihoodCalculator plc(*data, *modelWithConstraints, paramOfInterest); - ProfileLikelihoodCalculator plc(*data, modelConfig); - plc.SetTestSize(.05); - ConfInterval* lrint = plc.GetInterval(); // that was easy. - - // Let's make a plot - TCanvas* dataCanvas = new TCanvas("dataCanvas"); - dataCanvas->Divide(2,1); - - dataCanvas->cd(1); - LikelihoodIntervalPlot plotInt((LikelihoodInterval*)lrint); - plotInt.SetTitle("Profile Likelihood Ratio and Posterior for S"); - plotInt.Draw(); - - // Second, use a Calculator based on the Feldman Cousins technique - FeldmanCousins fc(*data, modelConfig); - fc.UseAdaptiveSampling(true); - fc.FluctuateNumDataEntries(false); // number counting analysis: dataset always has 1 entry with N events observed - fc.SetNBins(100); // number of points to test per parameter - fc.SetTestSize(.05); - // fc.SaveBeltToFile(true); // optional - ConfInterval* fcint = NULL; - fcint = fc.GetInterval(); // that was easy. - - RooFitResult* fit = modelWithConstraints->fitTo(*data, Save(true)); - - // Third, use a Calculator based on Markov Chain monte carlo - // Before configuring the calculator, let's make a ProposalFunction - // that will achieve a high acceptance rate - ProposalHelper ph; - ph.SetVariables((RooArgSet&)fit->floatParsFinal()); - ph.SetCovMatrix(fit->covarianceMatrix()); - ph.SetUpdateProposalParameters(true); - ph.SetCacheSize(100); - ProposalFunction* pdfProp = ph.GetProposalFunction(); // that was easy - - MCMCCalculator mc(*data, modelConfig); - mc.SetNumIters(20000); // steps to propose in the chain - mc.SetTestSize(.05); // 95% CL - mc.SetNumBurnInSteps(40); // ignore first N steps in chain as "burn in" - mc.SetProposalFunction(*pdfProp); - mc.SetLeftSideTailFraction(0.5); // find a "central" interval - MCMCInterval* mcInt = (MCMCInterval*)mc.GetInterval(); // that was easy - - - // Get Lower and Upper limits from Profile Calculator - cout << "Profile lower limit on s = " << ((LikelihoodInterval*) lrint)->LowerLimit(*s) << endl; - cout << "Profile upper limit on s = " << ((LikelihoodInterval*) lrint)->UpperLimit(*s) << endl; - - // Get Lower and Upper limits from FeldmanCousins with profile construction - if (fcint != NULL) { - double fcul = ((PointSetInterval*) fcint)->UpperLimit(*s); - double fcll = ((PointSetInterval*) fcint)->LowerLimit(*s); - cout << "FC lower limit on s = " << fcll << endl; - cout << "FC upper limit on s = " << fcul << endl; - TLine* fcllLine = new TLine(fcll, 0, fcll, 1); - TLine* fculLine = new TLine(fcul, 0, fcul, 1); - fcllLine->SetLineColor(kRed); - fculLine->SetLineColor(kRed); - fcllLine->Draw("same"); - fculLine->Draw("same"); - dataCanvas->Update(); - } - - // Plot MCMC interval and print some statistics - MCMCIntervalPlot mcPlot(*mcInt); - mcPlot.SetLineColor(kMagenta); - mcPlot.SetLineWidth(2); - mcPlot.Draw("same"); - - double mcul = mcInt->UpperLimit(*s); - double mcll = mcInt->LowerLimit(*s); - cout << "MCMC lower limit on s = " << mcll << endl; - cout << "MCMC upper limit on s = " << mcul << endl; - cout << "MCMC Actual confidence level: " - << mcInt->GetActualConfidenceLevel() << endl; - - // 3-d plot of the parameter points - dataCanvas->cd(2); - // also plot the points in the markov chain - RooDataSet * chainData = mcInt->GetChainAsDataSet(); - - assert(chainData); - std::cout << "plotting the chain data - nentries = " << chainData->numEntries() << std::endl; - TTree* chain = RooStats::GetAsTTree("chainTreeData","chainTreeData",*chainData); - assert(chain); - chain->SetMarkerStyle(6); - chain->SetMarkerColor(kRed); - - chain->Draw("s:ratioSigEff:ratioBkgEff","nll_MarkovChain_local_","box"); // 3-d box proporional to posterior - - // the points used in the profile construction - RooDataSet * parScanData = (RooDataSet*) fc.GetPointsToScan(); - assert(parScanData); - std::cout << "plotting the scanned points used in the frequentist construction - npoints = " << parScanData->numEntries() << std::endl; - // getting the tree and drawing it -crashes (very strange....); - // TTree* parameterScan = RooStats::GetAsTTree("parScanTreeData","parScanTreeData",*parScanData); - // assert(parameterScan); - // parameterScan->Draw("s:ratioSigEff:ratioBkgEff","","goff"); - TGraph2D *gr = new TGraph2D(parScanData->numEntries()); - for (int ievt = 0; ievt < parScanData->numEntries(); ++ievt) { - const RooArgSet * evt = parScanData->get(ievt); - double x = evt->getRealValue("ratioBkgEff"); - double y = evt->getRealValue("ratioSigEff"); - double z = evt->getRealValue("s"); - gr->SetPoint(ievt, x,y,z); - // std::cout << ievt << " " << x << " " << y << " " << z << std::endl; - } - gr->SetMarkerStyle(24); - gr->Draw("P SAME"); - - - delete wspace; - delete lrint; - delete mcInt; - delete fcint; - delete data; - - /// print timing info - t.Stop(); - t.Print(); + ///////////////////////////////////////// + // An example of setting a limit in a number counting experiment with uncertainty on background and signal + ///////////////////////////////////////// + + // to time the macro + TStopwatch t; + t.Start(); + + ///////////////////////////////////////// + // The Model building stage + ///////////////////////////////////////// + RooWorkspace* wspace = new RooWorkspace(); + wspace->factory("Poisson::countingModel(obs[150,0,300], sum(s[50,0,120]*ratioSigEff[1.,0,3.],b[100]*ratioBkgEff[1.,0.,3.]))"); // counting model + // wspace->factory("Gaussian::sigConstraint(ratioSigEff,1,0.05)"); // 5% signal efficiency uncertainty + // wspace->factory("Gaussian::bkgConstraint(ratioBkgEff,1,0.1)"); // 10% background efficiency uncertainty + wspace->factory("Gaussian::sigConstraint(gSigEff[1,0,3],ratioSigEff,0.05)"); // 5% signal efficiency uncertainty + wspace->factory("Gaussian::bkgConstraint(gSigBkg[1,0,3],ratioBkgEff,0.2)"); // 10% background efficiency uncertainty + wspace->factory("PROD::modelWithConstraints(countingModel,sigConstraint,bkgConstraint)"); // product of terms + wspace->Print(); + + RooAbsPdf* modelWithConstraints = wspace->pdf("modelWithConstraints"); // get the model + RooRealVar* obs = wspace->var("obs"); // get the observable + RooRealVar* s = wspace->var("s"); // get the signal we care about + RooRealVar* b = wspace->var("b"); // get the background and set it to a constant. Uncertainty included in ratioBkgEff + b->setConstant(); + + RooRealVar* ratioSigEff = wspace->var("ratioSigEff"); // get uncertaint parameter to constrain + RooRealVar* ratioBkgEff = wspace->var("ratioBkgEff"); // get uncertaint parameter to constrain + RooArgSet constrainedParams(*ratioSigEff, *ratioBkgEff); // need to constrain these in the fit (should change default behavior) + + RooRealVar * gSigEff = wspace->var("gSigEff"); // global observables for signal efficiency + RooRealVar * gSigBkg = wspace->var("gSigBkg"); // global obs for background efficiency + gSigEff->setConstant(); + gSigBkg->setConstant(); + + // Create an example dataset with 160 observed events + obs->setVal(160.); + RooDataSet* data = new RooDataSet("exampleData", "exampleData", RooArgSet(*obs)); + data->add(*obs); + + RooArgSet all(*s, *ratioBkgEff, *ratioSigEff); + + // not necessary + modelWithConstraints->fitTo(*data, RooFit::Constrain(RooArgSet(*ratioSigEff, *ratioBkgEff))); + + // Now let's make some confidence intervals for s, our parameter of interest + RooArgSet paramOfInterest(*s); + + ModelConfig modelConfig(wspace); + modelConfig.SetPdf(*modelWithConstraints); + modelConfig.SetParametersOfInterest(paramOfInterest); + modelConfig.SetNuisanceParameters(constrainedParams); + modelConfig.SetObservables(*obs); + modelConfig.SetGlobalObservables( RooArgSet(*gSigEff,*gSigBkg)); + modelConfig.SetName("ModelConfig"); + wspace->import(modelConfig); + wspace->import(*data); + wspace->SetName("w"); + wspace->writeToFile("rs101_ws.root"); + + + + // First, let's use a Calculator based on the Profile Likelihood Ratio + //ProfileLikelihoodCalculator plc(*data, *modelWithConstraints, paramOfInterest); + ProfileLikelihoodCalculator plc(*data, modelConfig); + plc.SetTestSize(.05); + ConfInterval* lrint = plc.GetInterval(); // that was easy. + + // Let's make a plot + TCanvas* dataCanvas = new TCanvas("dataCanvas"); + dataCanvas->Divide(2,1); + + dataCanvas->cd(1); + LikelihoodIntervalPlot plotInt((LikelihoodInterval*)lrint); + plotInt.SetTitle("Profile Likelihood Ratio and Posterior for S"); + plotInt.Draw(); + + // Second, use a Calculator based on the Feldman Cousins technique + FeldmanCousins fc(*data, modelConfig); + fc.UseAdaptiveSampling(true); + fc.FluctuateNumDataEntries(false); // number counting analysis: dataset always has 1 entry with N events observed + fc.SetNBins(100); // number of points to test per parameter + fc.SetTestSize(.05); + // fc.SaveBeltToFile(true); // optional + ConfInterval* fcint = NULL; + fcint = fc.GetInterval(); // that was easy. + + RooFitResult* fit = modelWithConstraints->fitTo(*data, Save(true)); + + // Third, use a Calculator based on Markov Chain monte carlo + // Before configuring the calculator, let's make a ProposalFunction + // that will achieve a high acceptance rate + ProposalHelper ph; + ph.SetVariables((RooArgSet&)fit->floatParsFinal()); + ph.SetCovMatrix(fit->covarianceMatrix()); + ph.SetUpdateProposalParameters(true); + ph.SetCacheSize(100); + ProposalFunction* pdfProp = ph.GetProposalFunction(); // that was easy + + MCMCCalculator mc(*data, modelConfig); + mc.SetNumIters(20000); // steps to propose in the chain + mc.SetTestSize(.05); // 95% CL + mc.SetNumBurnInSteps(40); // ignore first N steps in chain as "burn in" + mc.SetProposalFunction(*pdfProp); + mc.SetLeftSideTailFraction(0.5); // find a "central" interval + MCMCInterval* mcInt = (MCMCInterval*)mc.GetInterval(); // that was easy + + + // Get Lower and Upper limits from Profile Calculator + cout << "Profile lower limit on s = " << ((LikelihoodInterval*) lrint)->LowerLimit(*s) << endl; + cout << "Profile upper limit on s = " << ((LikelihoodInterval*) lrint)->UpperLimit(*s) << endl; + + // Get Lower and Upper limits from FeldmanCousins with profile construction + if (fcint != NULL) { + double fcul = ((PointSetInterval*) fcint)->UpperLimit(*s); + double fcll = ((PointSetInterval*) fcint)->LowerLimit(*s); + cout << "FC lower limit on s = " << fcll << endl; + cout << "FC upper limit on s = " << fcul << endl; + TLine* fcllLine = new TLine(fcll, 0, fcll, 1); + TLine* fculLine = new TLine(fcul, 0, fcul, 1); + fcllLine->SetLineColor(kRed); + fculLine->SetLineColor(kRed); + fcllLine->Draw("same"); + fculLine->Draw("same"); + dataCanvas->Update(); + } + + // Plot MCMC interval and print some statistics + MCMCIntervalPlot mcPlot(*mcInt); + mcPlot.SetLineColor(kMagenta); + mcPlot.SetLineWidth(2); + mcPlot.Draw("same"); + + double mcul = mcInt->UpperLimit(*s); + double mcll = mcInt->LowerLimit(*s); + cout << "MCMC lower limit on s = " << mcll << endl; + cout << "MCMC upper limit on s = " << mcul << endl; + cout << "MCMC Actual confidence level: " + << mcInt->GetActualConfidenceLevel() << endl; + + // 3-d plot of the parameter points + dataCanvas->cd(2); + // also plot the points in the markov chain + RooDataSet * chainData = mcInt->GetChainAsDataSet(); + + assert(chainData); + std::cout << "plotting the chain data - nentries = " << chainData->numEntries() << std::endl; + TTree* chain = RooStats::GetAsTTree("chainTreeData","chainTreeData",*chainData); + assert(chain); + chain->SetMarkerStyle(6); + chain->SetMarkerColor(kRed); + + chain->Draw("s:ratioSigEff:ratioBkgEff","nll_MarkovChain_local_","box"); // 3-d box proporional to posterior + + // the points used in the profile construction + RooDataSet * parScanData = (RooDataSet*) fc.GetPointsToScan(); + assert(parScanData); + std::cout << "plotting the scanned points used in the frequentist construction - npoints = " << parScanData->numEntries() << std::endl; + // getting the tree and drawing it -crashes (very strange....); + // TTree* parameterScan = RooStats::GetAsTTree("parScanTreeData","parScanTreeData",*parScanData); + // assert(parameterScan); + // parameterScan->Draw("s:ratioSigEff:ratioBkgEff","","goff"); + TGraph2D *gr = new TGraph2D(parScanData->numEntries()); + for (int ievt = 0; ievt < parScanData->numEntries(); ++ievt) { + const RooArgSet * evt = parScanData->get(ievt); + double x = evt->getRealValue("ratioBkgEff"); + double y = evt->getRealValue("ratioSigEff"); + double z = evt->getRealValue("s"); + gr->SetPoint(ievt, x,y,z); + // std::cout << ievt << " " << x << " " << y << " " << z << std::endl; + } + gr->SetMarkerStyle(24); + gr->Draw("P SAME"); + + + delete wspace; + delete lrint; + delete mcInt; + delete fcint; + delete data; + + /// print timing info + t.Stop(); + t.Print(); } diff --git a/tutorials/roostats/rs102_hypotestwithshapes.C b/tutorials/roostats/rs102_hypotestwithshapes.C index 2d4ad0c0fa752..f286fcd6daba8 100644 --- a/tutorials/roostats/rs102_hypotestwithshapes.C +++ b/tutorials/roostats/rs102_hypotestwithshapes.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// rs102_hypotestwithshapes for RooStats project /// /// This tutorial macro shows a typical search for a new particle @@ -18,9 +19,6 @@ /// /// \author Kyle Cranmer -#ifndef __CINT__ -#include "RooGlobalFunc.h" -#endif #include "RooDataSet.h" #include "RooRealVar.h" #include "RooGaussian.h" @@ -54,165 +52,164 @@ void MakePlots(RooWorkspace*); //____________________________________ void rs102_hypotestwithshapes() { - // The main macro. + // The main macro. - // Create a workspace to manage the project. - RooWorkspace* wspace = new RooWorkspace("myWS"); + // Create a workspace to manage the project. + RooWorkspace* wspace = new RooWorkspace("myWS"); - // add the signal and background models to the workspace - AddModel(wspace); + // add the signal and background models to the workspace + AddModel(wspace); - // add some toy data to the workspace - AddData(wspace); + // add some toy data to the workspace + AddData(wspace); - // inspect the workspace if you wish - // wspace->Print(); + // inspect the workspace if you wish + // wspace->Print(); - // do the hypothesis test - DoHypothesisTest(wspace); + // do the hypothesis test + DoHypothesisTest(wspace); - // make some plots - MakePlots(wspace); + // make some plots + MakePlots(wspace); - // cleanup - delete wspace; + // cleanup + delete wspace; } //____________________________________ void AddModel(RooWorkspace* wks){ - // Make models for signal (Higgs) and background (Z+jets and QCD) - // In real life, this part requires an intelligent modeling - // of signal and background -- this is only an example. - - // set range of observable - Double_t lowRange = 60, highRange = 200; - - // make a RooRealVar for the observable - RooRealVar invMass("invMass", "M_{inv}", lowRange, highRange,"GeV"); - - - ///////////////////////////////////////////// - // make a simple signal model. - RooRealVar mH("mH","Higgs Mass",130,90,160) ; - RooRealVar sigma1("sigma1","Width of Gaussian",12.,2,100) ; - RooGaussian sigModel("sigModel", "Signal Model", invMass, mH, sigma1); - // we will test this specific mass point for the signal - mH.setConstant(); - // and we assume we know the mass resolution - sigma1.setConstant(); - - ///////////////////////////////////////////// - // make zjj model. Just like signal model - RooRealVar mZ("mZ", "Z Mass", 91.2, 0, 100); - RooRealVar sigma1_z("sigma1_z","Width of Gaussian",10.,6,100) ; - RooGaussian zjjModel("zjjModel", "Z+jets Model", invMass, mZ, sigma1_z); - // we know Z mass - mZ.setConstant(); - // assume we know resolution too - sigma1_z.setConstant(); - - - ////////////////////////////////////////////// - // make QCD model - RooRealVar a0("a0","a0",0.26,-1,1) ; - RooRealVar a1("a1","a1",-0.17596,-1,1) ; - RooRealVar a2("a2","a2",0.018437,-1,1) ; - RooRealVar a3("a3","a3",0.02,-1,1) ; - RooChebychev qcdModel("qcdModel","A Polynomail for QCD",invMass,RooArgList(a0,a1,a2)) ; - - // let's assume this shape is known, but the normalization is not - a0.setConstant(); - a1.setConstant(); - a2.setConstant(); - - ////////////////////////////////////////////// - // combined model - - // Setting the fraction of Zjj to be 40% for initial guess. - RooRealVar fzjj("fzjj","fraction of zjj background events",.4,0.,1) ; - - // Set the expected fraction of signal to 20%. - RooRealVar fsigExpected("fsigExpected","expected fraction of signal events",.2,0.,1) ; - fsigExpected.setConstant(); // use mu as main parameter, so fix this. - - // Introduce mu: the signal strength in units of the expectation. - // eg. mu = 1 is the SM, mu = 0 is no signal, mu=2 is 2x the SM - RooRealVar mu("mu","signal strength in units of SM expectation",1,0.,2) ; - - // Introduce ratio of signal efficiency to nominal signal efficiency. - // This is useful if you want to do limits on cross section. - RooRealVar ratioSigEff("ratioSigEff","ratio of signal efficiency to nominal signal efficiency",1. ,0.,2) ; - ratioSigEff.setConstant(kTRUE); - - // finally the signal fraction is the product of the terms above. - RooProduct fsig("fsig","fraction of signal events",RooArgSet(mu,ratioSigEff,fsigExpected)) ; - - // full model - RooAddPdf model("model","sig+zjj+qcd background shapes",RooArgList(sigModel,zjjModel, qcdModel),RooArgList(fsig,fzjj)) ; - - // interesting for debugging and visualizing the model - // model.printCompactTree("","fullModel.txt"); - // model.graphVizTree("fullModel.dot"); - - wks->import(model); + // Make models for signal (Higgs) and background (Z+jets and QCD) + // In real life, this part requires an intelligent modeling + // of signal and background -- this is only an example. + + // set range of observable + Double_t lowRange = 60, highRange = 200; + + // make a RooRealVar for the observable + RooRealVar invMass("invMass", "M_{inv}", lowRange, highRange,"GeV"); + + + ///////////////////////////////////////////// + // make a simple signal model. + RooRealVar mH("mH","Higgs Mass",130,90,160) ; + RooRealVar sigma1("sigma1","Width of Gaussian",12.,2,100) ; + RooGaussian sigModel("sigModel", "Signal Model", invMass, mH, sigma1); + // we will test this specific mass point for the signal + mH.setConstant(); + // and we assume we know the mass resolution + sigma1.setConstant(); + + ///////////////////////////////////////////// + // make zjj model. Just like signal model + RooRealVar mZ("mZ", "Z Mass", 91.2, 0, 100); + RooRealVar sigma1_z("sigma1_z","Width of Gaussian",10.,6,100) ; + RooGaussian zjjModel("zjjModel", "Z+jets Model", invMass, mZ, sigma1_z); + // we know Z mass + mZ.setConstant(); + // assume we know resolution too + sigma1_z.setConstant(); + + + ////////////////////////////////////////////// + // make QCD model + RooRealVar a0("a0","a0",0.26,-1,1) ; + RooRealVar a1("a1","a1",-0.17596,-1,1) ; + RooRealVar a2("a2","a2",0.018437,-1,1) ; + RooRealVar a3("a3","a3",0.02,-1,1) ; + RooChebychev qcdModel("qcdModel","A Polynomail for QCD",invMass,RooArgList(a0,a1,a2)) ; + + // let's assume this shape is known, but the normalization is not + a0.setConstant(); + a1.setConstant(); + a2.setConstant(); + + ////////////////////////////////////////////// + // combined model + + // Setting the fraction of Zjj to be 40% for initial guess. + RooRealVar fzjj("fzjj","fraction of zjj background events",.4,0.,1) ; + + // Set the expected fraction of signal to 20%. + RooRealVar fsigExpected("fsigExpected","expected fraction of signal events",.2,0.,1) ; + fsigExpected.setConstant(); // use mu as main parameter, so fix this. + + // Introduce mu: the signal strength in units of the expectation. + // eg. mu = 1 is the SM, mu = 0 is no signal, mu=2 is 2x the SM + RooRealVar mu("mu","signal strength in units of SM expectation",1,0.,2) ; + + // Introduce ratio of signal efficiency to nominal signal efficiency. + // This is useful if you want to do limits on cross section. + RooRealVar ratioSigEff("ratioSigEff","ratio of signal efficiency to nominal signal efficiency",1. ,0.,2) ; + ratioSigEff.setConstant(kTRUE); + + // finally the signal fraction is the product of the terms above. + RooProduct fsig("fsig","fraction of signal events",RooArgSet(mu,ratioSigEff,fsigExpected)) ; + + // full model + RooAddPdf model("model","sig+zjj+qcd background shapes",RooArgList(sigModel,zjjModel, qcdModel),RooArgList(fsig,fzjj)) ; + + // interesting for debugging and visualizing the model + // model.printCompactTree("","fullModel.txt"); + // model.graphVizTree("fullModel.dot"); + + wks->import(model); } //____________________________________ void AddData(RooWorkspace* wks){ - // Add a toy dataset + // Add a toy dataset - Int_t nEvents = 150; - RooAbsPdf* model = wks->pdf("model"); - RooRealVar* invMass = wks->var("invMass"); + Int_t nEvents = 150; + RooAbsPdf* model = wks->pdf("model"); + RooRealVar* invMass = wks->var("invMass"); - RooDataSet* data = model->generate(*invMass,nEvents); + RooDataSet* data = model->generate(*invMass,nEvents); - wks->import(*data, Rename("data")); + wks->import(*data, Rename("data")); } //____________________________________ void DoHypothesisTest(RooWorkspace* wks){ + // Use a RooStats ProfileLikleihoodCalculator to do the hypothesis test. + ModelConfig model; + model.SetWorkspace(*wks); + model.SetPdf("model"); - // Use a RooStats ProfileLikleihoodCalculator to do the hypothesis test. - ModelConfig model; - model.SetWorkspace(*wks); - model.SetPdf("model"); + //plc.SetData("data"); - //plc.SetData("data"); + ProfileLikelihoodCalculator plc; + plc.SetData( *(wks->data("data") )); - ProfileLikelihoodCalculator plc; - plc.SetData( *(wks->data("data") )); + // here we explicitly set the value of the parameters for the null. + // We want no signal contribution, eg. mu = 0 + RooRealVar* mu = wks->var("mu"); + // RooArgSet* nullParams = new RooArgSet("nullParams"); + // nullParams->addClone(*mu); + RooArgSet poi(*mu); + RooArgSet * nullParams = (RooArgSet*) poi.snapshot(); + nullParams->setRealValue("mu",0); - // here we explicitly set the value of the parameters for the null. - // We want no signal contribution, eg. mu = 0 - RooRealVar* mu = wks->var("mu"); -// RooArgSet* nullParams = new RooArgSet("nullParams"); -// nullParams->addClone(*mu); - RooArgSet poi(*mu); - RooArgSet * nullParams = (RooArgSet*) poi.snapshot(); - nullParams->setRealValue("mu",0); + //plc.SetNullParameters(*nullParams); + plc.SetModel(model); + // NOTE: using snapshot will import nullparams + // in the WS and merge with existing "mu" + // model.SetSnapshot(*nullParams); - //plc.SetNullParameters(*nullParams); - plc.SetModel(model); - // NOTE: using snapshot will import nullparams - // in the WS and merge with existing "mu" - // model.SetSnapshot(*nullParams); + //use instead setNuisanceParameters + plc.SetNullParameters( *nullParams); - //use instead setNuisanceParameters - plc.SetNullParameters( *nullParams); - - // We get a HypoTestResult out of the calculator, and we can query it. - HypoTestResult* htr = plc.GetHypoTest(); - cout << "-------------------------------------------------" << endl; - cout << "The p-value for the null is " << htr->NullPValue() << endl; - cout << "Corresponding to a signifcance of " << htr->Significance() << endl; - cout << "-------------------------------------------------\n\n" << endl; + // We get a HypoTestResult out of the calculator, and we can query it. + HypoTestResult* htr = plc.GetHypoTest(); + cout << "-------------------------------------------------" << endl; + cout << "The p-value for the null is " << htr->NullPValue() << endl; + cout << "Corresponding to a signifcance of " << htr->Significance() << endl; + cout << "-------------------------------------------------\n\n" << endl; } @@ -220,60 +217,60 @@ void DoHypothesisTest(RooWorkspace* wks){ //____________________________________ void MakePlots(RooWorkspace* wks) { - // Make plots of the data and the best fit model in two cases: - // first the signal+background case - // second the background-only case. + // Make plots of the data and the best fit model in two cases: + // first the signal+background case + // second the background-only case. - // get some things out of workspace - RooAbsPdf* model = wks->pdf("model"); - RooAbsPdf* sigModel = wks->pdf("sigModel"); - RooAbsPdf* zjjModel = wks->pdf("zjjModel"); - RooAbsPdf* qcdModel = wks->pdf("qcdModel"); + // get some things out of workspace + RooAbsPdf* model = wks->pdf("model"); + RooAbsPdf* sigModel = wks->pdf("sigModel"); + RooAbsPdf* zjjModel = wks->pdf("zjjModel"); + RooAbsPdf* qcdModel = wks->pdf("qcdModel"); - RooRealVar* mu = wks->var("mu"); - RooRealVar* invMass = wks->var("invMass"); - RooAbsData* data = wks->data("data"); + RooRealVar* mu = wks->var("mu"); + RooRealVar* invMass = wks->var("invMass"); + RooAbsData* data = wks->data("data"); - ////////////////////////////////////////////////////////// - // Make plots for the Alternate hypothesis, eg. let mu float + ////////////////////////////////////////////////////////// + // Make plots for the Alternate hypothesis, eg. let mu float - mu->setConstant(kFALSE); + mu->setConstant(kFALSE); - model->fitTo(*data,Save(kTRUE),Minos(kFALSE), Hesse(kFALSE),PrintLevel(-1)); + model->fitTo(*data,Save(kTRUE),Minos(kFALSE), Hesse(kFALSE),PrintLevel(-1)); - //plot sig candidates, full model, and individual components - new TCanvas(); - RooPlot* frame = invMass->frame() ; - data->plotOn(frame ) ; - model->plotOn(frame) ; - model->plotOn(frame,Components(*sigModel),LineStyle(kDashed), LineColor(kRed)) ; - model->plotOn(frame,Components(*zjjModel),LineStyle(kDashed),LineColor(kBlack)) ; - model->plotOn(frame,Components(*qcdModel),LineStyle(kDashed),LineColor(kGreen)) ; + //plot sig candidates, full model, and individual components + new TCanvas(); + RooPlot* frame = invMass->frame() ; + data->plotOn(frame ) ; + model->plotOn(frame) ; + model->plotOn(frame,Components(*sigModel),LineStyle(kDashed), LineColor(kRed)) ; + model->plotOn(frame,Components(*zjjModel),LineStyle(kDashed),LineColor(kBlack)) ; + model->plotOn(frame,Components(*qcdModel),LineStyle(kDashed),LineColor(kGreen)) ; - frame->SetTitle("An example fit to the signal + background model"); - frame->Draw() ; - // cdata->SaveAs("alternateFit.gif"); + frame->SetTitle("An example fit to the signal + background model"); + frame->Draw() ; + // cdata->SaveAs("alternateFit.gif"); - ////////////////////////////////////////////////////////// - // Do Fit to the Null hypothesis. Eg. fix mu=0 + ////////////////////////////////////////////////////////// + // Do Fit to the Null hypothesis. Eg. fix mu=0 - mu->setVal(0); // set signal fraction to 0 - mu->setConstant(kTRUE); // set constant + mu->setVal(0); // set signal fraction to 0 + mu->setConstant(kTRUE); // set constant - model->fitTo(*data, Save(kTRUE), Minos(kFALSE), Hesse(kFALSE),PrintLevel(-1)); + model->fitTo(*data, Save(kTRUE), Minos(kFALSE), Hesse(kFALSE),PrintLevel(-1)); - // plot signal candidates with background model and components - new TCanvas(); - RooPlot* xframe2 = invMass->frame() ; - data->plotOn(xframe2, DataError(RooAbsData::SumW2)) ; - model->plotOn(xframe2) ; - model->plotOn(xframe2, Components(*zjjModel),LineStyle(kDashed),LineColor(kBlack)) ; - model->plotOn(xframe2, Components(*qcdModel),LineStyle(kDashed),LineColor(kGreen)) ; + // plot signal candidates with background model and components + new TCanvas(); + RooPlot* xframe2 = invMass->frame() ; + data->plotOn(xframe2, DataError(RooAbsData::SumW2)) ; + model->plotOn(xframe2) ; + model->plotOn(xframe2, Components(*zjjModel),LineStyle(kDashed),LineColor(kBlack)) ; + model->plotOn(xframe2, Components(*qcdModel),LineStyle(kDashed),LineColor(kGreen)) ; - xframe2->SetTitle("An example fit to the background-only model"); - xframe2->Draw() ; - // cbkgonly->SaveAs("nullFit.gif"); + xframe2->SetTitle("An example fit to the background-only model"); + xframe2->Draw() ; + // cbkgonly->SaveAs("nullFit.gif"); } diff --git a/tutorials/roostats/rs301_splot.C b/tutorials/roostats/rs301_splot.C index 1ecd145e86b2a..9a19738a2f5a9 100644 --- a/tutorials/roostats/rs301_splot.C +++ b/tutorials/roostats/rs301_splot.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// SPlot tutorial /// /// This tutorial shows an example of using SPlot to unfold two distributions. @@ -22,9 +23,6 @@ /// /// \author Kyle Cranmer -#ifndef __CINT__ -#include "RooGlobalFunc.h" -#endif #include "RooRealVar.h" #include "RooStats/SPlot.h" #include "RooDataSet.h" @@ -44,8 +42,8 @@ #include "RooConstVar.h" // use this order for safety on library loading -using namespace RooFit ; -using namespace RooStats ; +using namespace RooFit; +using namespace RooStats; // see below for implementation @@ -57,29 +55,29 @@ void MakePlots(RooWorkspace*); void rs301_splot() { - // Create a new workspace to manage the project. - RooWorkspace* wspace = new RooWorkspace("myWS"); + // Create a new workspace to manage the project. + RooWorkspace* wspace = new RooWorkspace("myWS"); - // add the signal and background models to the workspace. - // Inside this function you will find a description our model. - AddModel(wspace); + // add the signal and background models to the workspace. + // Inside this function you will find a description our model. + AddModel(wspace); - // add some toy data to the workspace - AddData(wspace); + // add some toy data to the workspace + AddData(wspace); - // inspect the workspace if you wish - // wspace->Print(); + // inspect the workspace if you wish + // wspace->Print(); - // do sPlot. - //This wil make a new dataset with sWeights added for every event. - DoSPlot(wspace); + // do sPlot. + //This wil make a new dataset with sWeights added for every event. + DoSPlot(wspace); - // Make some plots showing the discriminating variable and - // the control variable after unfolding. - MakePlots(wspace); + // Make some plots showing the discriminating variable and + // the control variable after unfolding. + MakePlots(wspace); - // cleanup - delete wspace; + // cleanup + delete wspace; } @@ -87,254 +85,254 @@ void rs301_splot() //____________________________________ void AddModel(RooWorkspace* ws){ - // Make models for signal (Higgs) and background (Z+jets and QCD) - // In real life, this part requires an intelligent modeling - // of signal and background -- this is only an example. - - // set range of observable - Double_t lowRange = 00, highRange = 200; - - // make a RooRealVar for the observables - RooRealVar invMass("invMass", "M_{inv}", lowRange, highRange,"GeV"); - RooRealVar isolation("isolation", "isolation", 0., 20., "GeV"); - - - ///////////////////////////////////////////// - // make 2-d model for Z including the invariant mass - // distribution and an isolation distribution which we want to - // unfold from QCD. - std::cout << "make z model" << std::endl; - // mass model for Z - RooRealVar mZ("mZ", "Z Mass", 91.2, lowRange, highRange); - RooRealVar sigmaZ("sigmaZ", "Width of Gaussian", 2,0,10,"GeV"); - RooGaussian mZModel("mZModel", "Z+jets Model", invMass, mZ, sigmaZ); - // we know Z mass - mZ.setConstant(); - // we leave the width of the Z free during the fit in this example. - - // isolation model for Z. Only used to generate toy MC. - // the exponential is of the form exp(c*x). If we want - // the isolation to decay an e-fold every R GeV, we use - // c = -1/R. - RooConstVar zIsolDecayConst("zIsolDecayConst", + // Make models for signal (Higgs) and background (Z+jets and QCD) + // In real life, this part requires an intelligent modeling + // of signal and background -- this is only an example. + + // set range of observable + Double_t lowRange = 00, highRange = 200; + + // make a RooRealVar for the observables + RooRealVar invMass("invMass", "M_{inv}", lowRange, highRange,"GeV"); + RooRealVar isolation("isolation", "isolation", 0., 20., "GeV"); + + + ///////////////////////////////////////////// + // make 2-d model for Z including the invariant mass + // distribution and an isolation distribution which we want to + // unfold from QCD. + std::cout << "make z model" << std::endl; + // mass model for Z + RooRealVar mZ("mZ", "Z Mass", 91.2, lowRange, highRange); + RooRealVar sigmaZ("sigmaZ", "Width of Gaussian", 2,0,10,"GeV"); + RooGaussian mZModel("mZModel", "Z+jets Model", invMass, mZ, sigmaZ); + // we know Z mass + mZ.setConstant(); + // we leave the width of the Z free during the fit in this example. + + // isolation model for Z. Only used to generate toy MC. + // the exponential is of the form exp(c*x). If we want + // the isolation to decay an e-fold every R GeV, we use + // c = -1/R. + RooConstVar zIsolDecayConst("zIsolDecayConst", "z isolation decay constant", -1); - RooExponential zIsolationModel("zIsolationModel", "z isolation model", + RooExponential zIsolationModel("zIsolationModel", "z isolation model", isolation, zIsolDecayConst); - // make the combined Z model - RooProdPdf zModel("zModel", "4-d model for Z", - RooArgSet(mZModel, zIsolationModel)); - - ////////////////////////////////////////////// - // make QCD model - - std::cout << "make qcd model" << std::endl; - // mass model for QCD. - // the exponential is of the form exp(c*x). If we want - // the mass to decay an e-fold every R GeV, we use - // c = -1/R. - // We can leave this parameter free during the fit. - RooRealVar qcdMassDecayConst("qcdMassDecayConst", - "Decay const for QCD mass spectrum", - -0.01, -100, 100,"1/GeV"); - RooExponential qcdMassModel("qcdMassModel", "qcd Mass Model", + // make the combined Z model + RooProdPdf zModel("zModel", "4-d model for Z", + RooArgSet(mZModel, zIsolationModel)); + + ////////////////////////////////////////////// + // make QCD model + + std::cout << "make qcd model" << std::endl; + // mass model for QCD. + // the exponential is of the form exp(c*x). If we want + // the mass to decay an e-fold every R GeV, we use + // c = -1/R. + // We can leave this parameter free during the fit. + RooRealVar qcdMassDecayConst("qcdMassDecayConst", + "Decay const for QCD mass spectrum", + -0.01, -100, 100,"1/GeV"); + RooExponential qcdMassModel("qcdMassModel", "qcd Mass Model", invMass, qcdMassDecayConst); - // isolation model for QCD. Only used to generate toy MC - // the exponential is of the form exp(c*x). If we want - // the isolation to decay an e-fold every R GeV, we use - // c = -1/R. - RooConstVar qcdIsolDecayConst("qcdIsolDecayConst", - "Et resolution constant", -.1); - RooExponential qcdIsolationModel("qcdIsolationModel", "QCD isolation model", - isolation, qcdIsolDecayConst); + // isolation model for QCD. Only used to generate toy MC + // the exponential is of the form exp(c*x). If we want + // the isolation to decay an e-fold every R GeV, we use + // c = -1/R. + RooConstVar qcdIsolDecayConst("qcdIsolDecayConst", + "Et resolution constant", -.1); + RooExponential qcdIsolationModel("qcdIsolationModel", "QCD isolation model", + isolation, qcdIsolDecayConst); - // make the 2-d model - RooProdPdf qcdModel("qcdModel", "2-d model for QCD", - RooArgSet(qcdMassModel, qcdIsolationModel)); + // make the 2-d model + RooProdPdf qcdModel("qcdModel", "2-d model for QCD", + RooArgSet(qcdMassModel, qcdIsolationModel)); - ////////////////////////////////////////////// - // combined model + ////////////////////////////////////////////// + // combined model - // These variables represent the number of Z or QCD events - // They will be fitted. - RooRealVar zYield("zYield","fitted yield for Z",50 ,0.,1000) ; - RooRealVar qcdYield("qcdYield","fitted yield for QCD", 100 ,0.,1000) ; + // These variables represent the number of Z or QCD events + // They will be fitted. + RooRealVar zYield("zYield","fitted yield for Z",50 ,0.,1000) ; + RooRealVar qcdYield("qcdYield","fitted yield for QCD", 100 ,0.,1000) ; - // now make the combined model - std::cout << "make full model" << std::endl; - RooAddPdf model("model","z+qcd background models", + // now make the combined model + std::cout << "make full model" << std::endl; + RooAddPdf model("model","z+qcd background models", RooArgList(zModel, qcdModel), RooArgList(zYield,qcdYield)); - // interesting for debugging and visualizing the model - model.graphVizTree("fullModel.dot"); + // interesting for debugging and visualizing the model + model.graphVizTree("fullModel.dot"); - std::cout << "import model" << std::endl; + std::cout << "import model" << std::endl; - ws->import(model); + ws->import(model); } //____________________________________ void AddData(RooWorkspace* ws){ - // Add a toy dataset + // Add a toy dataset - // how many events do we want? - Int_t nEvents = 1000; + // how many events do we want? + Int_t nEvents = 1000; - // get what we need out of the workspace to make toy data - RooAbsPdf* model = ws->pdf("model"); - RooRealVar* invMass = ws->var("invMass"); - RooRealVar* isolation = ws->var("isolation"); + // get what we need out of the workspace to make toy data + RooAbsPdf* model = ws->pdf("model"); + RooRealVar* invMass = ws->var("invMass"); + RooRealVar* isolation = ws->var("isolation"); - // make the toy data - std::cout << "make data set and import to workspace" << std::endl; - RooDataSet* data = model->generate(RooArgSet(*invMass, *isolation),nEvents); + // make the toy data + std::cout << "make data set and import to workspace" << std::endl; + RooDataSet* data = model->generate(RooArgSet(*invMass, *isolation),nEvents); - // import data into workspace - ws->import(*data, Rename("data")); + // import data into workspace + ws->import(*data, Rename("data")); } //____________________________________ void DoSPlot(RooWorkspace* ws){ - std::cout << "Calculate sWeights" << std::endl; + std::cout << "Calculate sWeights" << std::endl; - // get what we need out of the workspace to do the fit - RooAbsPdf* model = ws->pdf("model"); - RooRealVar* zYield = ws->var("zYield"); - RooRealVar* qcdYield = ws->var("qcdYield"); - RooDataSet* data = (RooDataSet*) ws->data("data"); + // get what we need out of the workspace to do the fit + RooAbsPdf* model = ws->pdf("model"); + RooRealVar* zYield = ws->var("zYield"); + RooRealVar* qcdYield = ws->var("qcdYield"); + RooDataSet* data = (RooDataSet*) ws->data("data"); - // fit the model to the data. - model->fitTo(*data, Extended() ); + // fit the model to the data. + model->fitTo(*data, Extended() ); - // The sPlot technique requires that we fix the parameters - // of the model that are not yields after doing the fit. - // - // This *could* be done with the lines below, however this is taken care of - // by the RooStats::SPlot constructor (or more precisely the AddSWeight - // method). - // - //RooRealVar* sigmaZ = ws->var("sigmaZ"); - //RooRealVar* qcdMassDecayConst = ws->var("qcdMassDecayConst"); - //sigmaZ->setConstant(); - //qcdMassDecayConst->setConstant(); + // The sPlot technique requires that we fix the parameters + // of the model that are not yields after doing the fit. + // + // This *could* be done with the lines below, however this is taken care of + // by the RooStats::SPlot constructor (or more precisely the AddSWeight + // method). + // + //RooRealVar* sigmaZ = ws->var("sigmaZ"); + //RooRealVar* qcdMassDecayConst = ws->var("qcdMassDecayConst"); + //sigmaZ->setConstant(); + //qcdMassDecayConst->setConstant(); - RooMsgService::instance().setSilentMode(true); + RooMsgService::instance().setSilentMode(true); - // Now we use the SPlot class to add SWeights to our data set - // based on our model and our yield variables - RooStats::SPlot* sData = new RooStats::SPlot("sData","An SPlot", - *data, model, RooArgList(*zYield,*qcdYield) ); + // Now we use the SPlot class to add SWeights to our data set + // based on our model and our yield variables + RooStats::SPlot* sData = new RooStats::SPlot("sData","An SPlot", + *data, model, RooArgList(*zYield,*qcdYield) ); - // Check that our weights have the desired properties + // Check that our weights have the desired properties - std::cout << "Check SWeights:" << std::endl; + std::cout << "Check SWeights:" << std::endl; - std::cout << std::endl << "Yield of Z is " - << zYield->getVal() << ". From sWeights it is " - << sData->GetYieldFromSWeight("zYield") << std::endl; + std::cout << std::endl << "Yield of Z is " + << zYield->getVal() << ". From sWeights it is " + << sData->GetYieldFromSWeight("zYield") << std::endl; - std::cout << "Yield of QCD is " - << qcdYield->getVal() << ". From sWeights it is " - << sData->GetYieldFromSWeight("qcdYield") << std::endl - << std::endl; + std::cout << "Yield of QCD is " + << qcdYield->getVal() << ". From sWeights it is " + << sData->GetYieldFromSWeight("qcdYield") << std::endl + << std::endl; - for(Int_t i=0; i < 10; i++) - { + for(Int_t i=0; i < 10; i++) + { std::cout << "z Weight " << sData->GetSWeight(i,"zYield") - << " qcd Weight " << sData->GetSWeight(i,"qcdYield") - << " Total Weight " << sData->GetSumOfEventSWeight(i) - << std::endl; - } + << " qcd Weight " << sData->GetSWeight(i,"qcdYield") + << " Total Weight " << sData->GetSumOfEventSWeight(i) + << std::endl; + } - std::cout << std::endl; + std::cout << std::endl; - // import this new dataset with sWeights - std::cout << "import new dataset with sWeights" << std::endl; - ws->import(*data, Rename("dataWithSWeights")); + // import this new dataset with sWeights + std::cout << "import new dataset with sWeights" << std::endl; + ws->import(*data, Rename("dataWithSWeights")); } void MakePlots(RooWorkspace* ws){ - // Here we make plots of the discriminating variable (invMass) after the fit - // and of the control variable (isolation) after unfolding with sPlot. - std::cout << "make plots" << std::endl; + // Here we make plots of the discriminating variable (invMass) after the fit + // and of the control variable (isolation) after unfolding with sPlot. + std::cout << "make plots" << std::endl; - // make our canvas - TCanvas* cdata = new TCanvas("sPlot","sPlot demo", 400, 600); - cdata->Divide(1,3); + // make our canvas + TCanvas* cdata = new TCanvas("sPlot","sPlot demo", 400, 600); + cdata->Divide(1,3); - // get what we need out of the workspace - RooAbsPdf* model = ws->pdf("model"); - RooAbsPdf* zModel = ws->pdf("zModel"); - RooAbsPdf* qcdModel = ws->pdf("qcdModel"); + // get what we need out of the workspace + RooAbsPdf* model = ws->pdf("model"); + RooAbsPdf* zModel = ws->pdf("zModel"); + RooAbsPdf* qcdModel = ws->pdf("qcdModel"); - RooRealVar* isolation = ws->var("isolation"); - RooRealVar* invMass = ws->var("invMass"); + RooRealVar* isolation = ws->var("isolation"); + RooRealVar* invMass = ws->var("invMass"); - // note, we get the dataset with sWeights - RooDataSet* data = (RooDataSet*) ws->data("dataWithSWeights"); + // note, we get the dataset with sWeights + RooDataSet* data = (RooDataSet*) ws->data("dataWithSWeights"); - // this shouldn't be necessary, need to fix something with workspace - // do this to set parameters back to their fitted values. - model->fitTo(*data, Extended() ); + // this shouldn't be necessary, need to fix something with workspace + // do this to set parameters back to their fitted values. + model->fitTo(*data, Extended() ); - //plot invMass for data with full model and individual components overlaid - // TCanvas* cdata = new TCanvas(); - cdata->cd(1); - RooPlot* frame = invMass->frame() ; - data->plotOn(frame ) ; - model->plotOn(frame) ; - model->plotOn(frame,Components(*zModel),LineStyle(kDashed), LineColor(kRed)) ; - model->plotOn(frame,Components(*qcdModel),LineStyle(kDashed),LineColor(kGreen)) ; + //plot invMass for data with full model and individual components overlaid + // TCanvas* cdata = new TCanvas(); + cdata->cd(1); + RooPlot* frame = invMass->frame() ; + data->plotOn(frame ) ; + model->plotOn(frame) ; + model->plotOn(frame,Components(*zModel),LineStyle(kDashed), LineColor(kRed)) ; + model->plotOn(frame,Components(*qcdModel),LineStyle(kDashed),LineColor(kGreen)) ; - frame->SetTitle("Fit of model to discriminating variable"); - frame->Draw() ; + frame->SetTitle("Fit of model to discriminating variable"); + frame->Draw() ; - // Now use the sWeights to show isolation distribution for Z and QCD. - // The SPlot class can make this easier, but here we demonstrate in more - // detail how the sWeights are used. The SPlot class should make this - // very easy and needs some more development. + // Now use the sWeights to show isolation distribution for Z and QCD. + // The SPlot class can make this easier, but here we demonstrate in more + // detail how the sWeights are used. The SPlot class should make this + // very easy and needs some more development. - // Plot isolation for Z component. - // Do this by plotting all events weighted by the sWeight for the Z component. - // The SPlot class adds a new variable that has the name of the corresponding - // yield + "_sw". - cdata->cd(2); + // Plot isolation for Z component. + // Do this by plotting all events weighted by the sWeight for the Z component. + // The SPlot class adds a new variable that has the name of the corresponding + // yield + "_sw". + cdata->cd(2); - // create weighted data set - RooDataSet * dataw_z = new RooDataSet(data->GetName(),data->GetTitle(),data,*data->get(),0,"zYield_sw") ; + // create weighted data set + RooDataSet * dataw_z = new RooDataSet(data->GetName(),data->GetTitle(),data,*data->get(),0,"zYield_sw") ; - RooPlot* frame2 = isolation->frame() ; - dataw_z->plotOn(frame2, DataError(RooAbsData::SumW2) ) ; + RooPlot* frame2 = isolation->frame() ; + dataw_z->plotOn(frame2, DataError(RooAbsData::SumW2) ) ; - frame2->SetTitle("isolation distribution for Z"); - frame2->Draw() ; + frame2->SetTitle("isolation distribution for Z"); + frame2->Draw() ; - // Plot isolation for QCD component. - // Eg. plot all events weighted by the sWeight for the QCD component. - // The SPlot class adds a new variable that has the name of the corresponding - // yield + "_sw". - cdata->cd(3); - RooDataSet * dataw_qcd = new RooDataSet(data->GetName(),data->GetTitle(),data,*data->get(),0,"qcdYield_sw") ; - RooPlot* frame3 = isolation->frame() ; - dataw_qcd->plotOn(frame3,DataError(RooAbsData::SumW2) ) ; + // Plot isolation for QCD component. + // Eg. plot all events weighted by the sWeight for the QCD component. + // The SPlot class adds a new variable that has the name of the corresponding + // yield + "_sw". + cdata->cd(3); + RooDataSet * dataw_qcd = new RooDataSet(data->GetName(),data->GetTitle(),data,*data->get(),0,"qcdYield_sw") ; + RooPlot* frame3 = isolation->frame() ; + dataw_qcd->plotOn(frame3,DataError(RooAbsData::SumW2) ) ; - frame3->SetTitle("isolation distribution for QCD"); - frame3->Draw() ; + frame3->SetTitle("isolation distribution for QCD"); + frame3->Draw() ; - // cdata->SaveAs("SPlot.gif"); + // cdata->SaveAs("SPlot.gif"); } diff --git a/tutorials/roostats/rs401c_FeldmanCousins.C b/tutorials/roostats/rs401c_FeldmanCousins.C index e1d003fc3378f..56436b598a18a 100644 --- a/tutorials/roostats/rs401c_FeldmanCousins.C +++ b/tutorials/roostats/rs401c_FeldmanCousins.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// Produces an interval on the mean signal in a number counting /// experiment with known background using the Feldman-Cousins technique. /// @@ -42,92 +43,92 @@ #include // use this order for safety on library loading -using namespace RooFit ; -using namespace RooStats ; +using namespace RooFit; +using namespace RooStats; void rs401c_FeldmanCousins() { - // to time the macro... about 30 s - TStopwatch t; - t.Start(); - - // make a simple model - RooRealVar x("x","", 1,0,50); - RooRealVar mu("mu","", 2.5,0, 15); // with a limit on mu>=0 - RooConstVar b("b","", 3.); - RooAddition mean("mean","",RooArgList(mu,b)); - RooPoisson pois("pois", "", x, mean); - RooArgSet parameters(mu); - - // create a toy dataset - RooDataSet* data = pois.generate(RooArgSet(x), 1); - data->Print("v"); - - TCanvas* dataCanvas = new TCanvas("dataCanvas"); - RooPlot* frame = x.frame(); - data->plotOn(frame); - frame->Draw(); - dataCanvas->Update(); - - RooWorkspace* w = new RooWorkspace(); - ModelConfig modelConfig("poissonProblem",w); - modelConfig.SetPdf(pois); - modelConfig.SetParametersOfInterest(parameters); - modelConfig.SetObservables(RooArgSet(x)); - w->Print(); - - //////// show use of Feldman-Cousins - RooStats::FeldmanCousins fc(*data,modelConfig); - fc.SetTestSize(.05); // set size of test - fc.UseAdaptiveSampling(true); - fc.FluctuateNumDataEntries(false); // number counting analysis: dataset always has 1 entry with N events observed - fc.SetNBins(100); // number of points to test per parameter - - // use the Feldman-Cousins tool - PointSetInterval* interval = (PointSetInterval*)fc.GetInterval(); - - // make a canvas for plots - TCanvas* intervalCanvas = new TCanvas("intervalCanvas"); - - std::cout << "is this point in the interval? " << - interval->IsInInterval(parameters) << std::endl; - - std::cout << "interval is ["<< - interval->LowerLimit(mu) << ", " << - interval->UpperLimit(mu) << "]" << endl; - - // using 200 bins it takes 1 min and the answer is - // interval is [0.2625, 10.6125] with a step size of .075 - // The interval in Feldman & Cousins's original paper is [.29, 10.81] - // Phys.Rev.D57:3873-3889,1998. - - // No dedicated plotting class yet, so do it by hand: - - RooDataHist* parameterScan = (RooDataHist*) fc.GetPointsToScan(); - TH1F* hist = (TH1F*) parameterScan->createHistogram("mu",30); - hist->Draw(); - - - RooArgSet* tmpPoint; - // loop over points to test - for(Int_t i=0; inumEntries(); ++i){ - // cout << "on parameter point " << i << " out of " << parameterScan->numEntries() << endl; - // get a parameter point from the list of points to test. - tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); - - TMarker* mark = new TMarker(tmpPoint->getRealValue("mu"), 1, 25); - if (interval->IsInInterval( *tmpPoint ) ) - mark->SetMarkerColor(kBlue); - else - mark->SetMarkerColor(kRed); - - mark->Draw("s"); - //delete tmpPoint; - // delete mark; - } - t.Stop(); - t.Print(); + // to time the macro... about 30 s + TStopwatch t; + t.Start(); + + // make a simple model + RooRealVar x("x","", 1,0,50); + RooRealVar mu("mu","", 2.5,0, 15); // with a limit on mu>=0 + RooConstVar b("b","", 3.); + RooAddition mean("mean","",RooArgList(mu,b)); + RooPoisson pois("pois", "", x, mean); + RooArgSet parameters(mu); + + // create a toy dataset + RooDataSet* data = pois.generate(RooArgSet(x), 1); + data->Print("v"); + + TCanvas* dataCanvas = new TCanvas("dataCanvas"); + RooPlot* frame = x.frame(); + data->plotOn(frame); + frame->Draw(); + dataCanvas->Update(); + + RooWorkspace* w = new RooWorkspace(); + ModelConfig modelConfig("poissonProblem",w); + modelConfig.SetPdf(pois); + modelConfig.SetParametersOfInterest(parameters); + modelConfig.SetObservables(RooArgSet(x)); + w->Print(); + + //////// show use of Feldman-Cousins + RooStats::FeldmanCousins fc(*data,modelConfig); + fc.SetTestSize(.05); // set size of test + fc.UseAdaptiveSampling(true); + fc.FluctuateNumDataEntries(false); // number counting analysis: dataset always has 1 entry with N events observed + fc.SetNBins(100); // number of points to test per parameter + + // use the Feldman-Cousins tool + PointSetInterval* interval = (PointSetInterval*)fc.GetInterval(); + + // make a canvas for plots + TCanvas* intervalCanvas = new TCanvas("intervalCanvas"); + + std::cout << "is this point in the interval? " << + interval->IsInInterval(parameters) << std::endl; + + std::cout << "interval is ["<< + interval->LowerLimit(mu) << ", " << + interval->UpperLimit(mu) << "]" << endl; + + // using 200 bins it takes 1 min and the answer is + // interval is [0.2625, 10.6125] with a step size of .075 + // The interval in Feldman & Cousins's original paper is [.29, 10.81] + // Phys.Rev.D57:3873-3889,1998. + + // No dedicated plotting class yet, so do it by hand: + + RooDataHist* parameterScan = (RooDataHist*) fc.GetPointsToScan(); + TH1F* hist = (TH1F*) parameterScan->createHistogram("mu",30); + hist->Draw(); + + + RooArgSet* tmpPoint; + // loop over points to test + for(Int_t i=0; inumEntries(); ++i){ + // cout << "on parameter point " << i << " out of " << parameterScan->numEntries() << endl; + // get a parameter point from the list of points to test. + tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); + + TMarker* mark = new TMarker(tmpPoint->getRealValue("mu"), 1, 25); + if (interval->IsInInterval( *tmpPoint ) ) + mark->SetMarkerColor(kBlue); + else + mark->SetMarkerColor(kRed); + + mark->Draw("s"); + //delete tmpPoint; + // delete mark; + } + t.Stop(); + t.Print(); } diff --git a/tutorials/roostats/rs401d_FeldmanCousins.C b/tutorials/roostats/rs401d_FeldmanCousins.C index 222efffa4c10e..0f4a244393b53 100644 --- a/tutorials/roostats/rs401d_FeldmanCousins.C +++ b/tutorials/roostats/rs401d_FeldmanCousins.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// 'Neutrino Oscillation Example from Feldman & Cousins' /// /// This tutorial shows a more complex example using the FeldmanCousins utility @@ -59,269 +60,269 @@ #endif // use this order for safety on library loading -using namespace RooFit ; +using namespace RooFit; using namespace RooStats ; void rs401d_FeldmanCousins(bool doFeldmanCousins=false, bool doMCMC = true) { - // to time the macro - TStopwatch t; - t.Start(); - - - /* - Taken from Feldman & Cousins paper, Phys.Rev.D57:3873-3889,1998. - e-Print: physics/9711021 (see page 13.) - - Quantum mechanics dictates that the probability of such a transformation is given by the formula - P (νµ → ν e ) = sin^2 (2θ) sin^2 (1.27 ∆m^2 L /E ) - where P is the probability for a νµ to transform into a νe , L is the distance in km between - the creation of the neutrino from meson decay and its interaction in the detector, E is the - neutrino energy in GeV, and ∆m^2 = |m^2− m^2 | in (eV/c^2 )^2 . - - To demonstrate how this works in practice, and how it compares to alternative approaches - that have been used, we consider a toy model of a typical neutrino oscillation experiment. - The toy model is defined by the following parameters: Mesons are assumed to decay to - neutrinos uniformly in a region 600 m to 1000 m from the detector. The expected background - from conventional νe interactions and misidentified νµ interactions is assumed to be 100 - events in each of 5 energy bins which span the region from 10 to 60 GeV. We assume that - the νµ flux is such that if P (νµ → ν e ) = 0.01 averaged over any bin, then that bin would - have an expected additional contribution of 100 events due to νµ → ν e oscillations. - */ - - // Make signal model model - RooRealVar E("E","", 15,10,60,"GeV"); - RooRealVar L("L","", .800,.600, 1.0,"km"); // need these units in formula - RooRealVar deltaMSq("deltaMSq","#Delta m^{2}",40,1,300,"eV/c^{2}"); - RooRealVar sinSq2theta("sinSq2theta","sin^{2}(2#theta)", .006,.0,.02); - //RooRealVar deltaMSq("deltaMSq","#Delta m^{2}",40,20,70,"eV/c^{2}"); - // RooRealVar sinSq2theta("sinSq2theta","sin^{2}(2#theta)", .006,.001,.01); - // PDF for oscillation only describes deltaMSq dependence, sinSq2theta goes into sigNorm - // 1) The code for this PDF was created by issuing these commands - // root [0] RooClassFactory x - // root [1] x.makePdf("NuMuToNuE_Oscillation","L,E,deltaMSq","","pow(sin(1.27*deltaMSq*L/E),2)") - NuMuToNuE_Oscillation PnmuTone("PnmuTone","P(#nu_{#mu} #rightarrow #nu_{e}",L,E,deltaMSq); - - // only E is observable, so create the signal model by integrating out L - RooAbsPdf* sigModel = PnmuTone.createProjection(L); - - // create \int dE' dL' P(E',L' | \Delta m^2). - // Given RooFit will renormalize the PDF in the range of the observables, - // the average probability to oscillate in the experiment's acceptance - // needs to be incorporated into the extended term in the likelihood. - // Do this by creating a RooAbsReal representing the integral and divide by - // the area in the E-L plane. - // The integral should be over "primed" observables, so we need - // an independent copy of PnmuTone not to interfere with the original. - - // Independent copy for Integral - RooRealVar EPrime("EPrime","", 15,10,60,"GeV"); - RooRealVar LPrime("LPrime","", .800,.600, 1.0,"km"); // need these units in formula - NuMuToNuE_Oscillation PnmuTonePrime("PnmuTonePrime","P(#nu_{#mu} #rightarrow #nu_{e}", - LPrime,EPrime,deltaMSq); - RooAbsReal* intProbToOscInExp = PnmuTonePrime.createIntegral(RooArgSet(EPrime,LPrime)); - - // Getting the flux is a bit tricky. It is more celear to include a cross section term that is not - // explicitly refered to in the text, eg. - // # events in bin = flux * cross-section for nu_e interaction in E bin * average prob nu_mu osc. to nu_e in bin - // let maxEventsInBin = flux * cross-section for nu_e interaction in E bin - // maxEventsInBin * 1% chance per bin = 100 events / bin - // therefore maxEventsInBin = 10,000. - // for 5 bins, this means maxEventsTot = 50,000 - RooConstVar maxEventsTot("maxEventsTot","maximum number of sinal events",50000); - RooConstVar inverseArea("inverseArea","1/(#Delta E #Delta L)", - 1./(EPrime.getMax()-EPrime.getMin())/(LPrime.getMax()-LPrime.getMin())); - - // sigNorm = maxEventsTot * (\int dE dL prob to oscillate in experiment / Area) * sin^2(2\theta) - RooProduct sigNorm("sigNorm", "", RooArgSet(maxEventsTot, *intProbToOscInExp, inverseArea, sinSq2theta)); - // bkg = 5 bins * 100 events / bin - RooConstVar bkgNorm("bkgNorm","normalization for background",500); - - // flat background (0th order polynomial, so no arguments for coefficients) - RooPolynomial bkgEShape("bkgEShape","flat bkg shape", E); - - // total model - RooAddPdf model("model","",RooArgList(*sigModel,bkgEShape), - RooArgList(sigNorm,bkgNorm)); - - // for debugging, check model tree - // model.printCompactTree(); - // model.graphVizTree("model.dot"); - - - // turn off some messages - RooMsgService::instance().setStreamStatus(0,kFALSE); - RooMsgService::instance().setStreamStatus(1,kFALSE); - RooMsgService::instance().setStreamStatus(2,kFALSE); - - - ////////////////////////////////////////////// - // n events in data to data, simply sum of sig+bkg - Int_t nEventsData = bkgNorm.getVal()+sigNorm.getVal(); - cout << "generate toy data with nEvents = " << nEventsData << endl; - // adjust random seed to get a toy dataset similar to one in paper. - // Found by trial and error (3 trials, so not very "fine tuned") - RooRandom::randomGenerator()->SetSeed(3); - // create a toy dataset - RooDataSet* data = model.generate(RooArgSet(E), nEventsData); - - ///////////////////////////////////////////// - // make some plots - TCanvas* dataCanvas = new TCanvas("dataCanvas"); - dataCanvas->Divide(2,2); - - // plot the PDF - dataCanvas->cd(1); - TH1* hh = PnmuTone.createHistogram("hh",E,Binning(40),YVar(L,Binning(40)),Scaling(kFALSE)) ; - hh->SetLineColor(kBlue) ; - hh->SetTitle("True Signal Model"); - hh->Draw("surf"); - - // plot the data with the best fit - dataCanvas->cd(2); - RooPlot* Eframe = E.frame(); - data->plotOn(Eframe); - model.fitTo(*data, Extended()); - model.plotOn(Eframe); - model.plotOn(Eframe,Components(*sigModel),LineColor(kRed)); - model.plotOn(Eframe,Components(bkgEShape),LineColor(kGreen)); - model.plotOn(Eframe); - Eframe->SetTitle("toy data with best fit model (and sig+bkg components)"); - Eframe->Draw(); - - // plot the likelihood function - dataCanvas->cd(3); - RooNLLVar nll("nll", "nll", model, *data, Extended()); - RooProfileLL pll("pll", "", nll, RooArgSet(deltaMSq, sinSq2theta)); - // TH1* hhh = nll.createHistogram("hhh",sinSq2theta,Binning(40),YVar(deltaMSq,Binning(40))) ; - TH1* hhh = pll.createHistogram("hhh",sinSq2theta,Binning(40),YVar(deltaMSq,Binning(40)),Scaling(kFALSE)) ; - hhh->SetLineColor(kBlue) ; - hhh->SetTitle("Likelihood Function"); - hhh->Draw("surf"); - - dataCanvas->Update(); - - - - ////////////////////////////////////////////////////////// - //////// show use of Feldman-Cousins utility in RooStats - // set the distribution creator, which encodes the test statistic - RooArgSet parameters(deltaMSq, sinSq2theta); - RooWorkspace* w = new RooWorkspace(); - - ModelConfig modelConfig; - modelConfig.SetWorkspace(*w); - modelConfig.SetPdf(model); - modelConfig.SetParametersOfInterest(parameters); - - RooStats::FeldmanCousins fc(*data, modelConfig); - fc.SetTestSize(.1); // set size of test - fc.UseAdaptiveSampling(true); - fc.SetNBins(10); // number of points to test per parameter - - // use the Feldman-Cousins tool - ConfInterval* interval = 0; - if(doFeldmanCousins) - interval = fc.GetInterval(); - - - /////////////////////////////////////////////////////////////////// - ///////// show use of ProfileLikeihoodCalculator utility in RooStats - RooStats::ProfileLikelihoodCalculator plc(*data, modelConfig); - plc.SetTestSize(.1); - - ConfInterval* plcInterval = plc.GetInterval(); - - /////////////////////////////////////////////////////////////////// - ///////// show use of MCMCCalculator utility in RooStats - MCMCInterval* mcInt = NULL; - - if (doMCMC) { - // turn some messages back on - RooMsgService::instance().setStreamStatus(0,kTRUE); - RooMsgService::instance().setStreamStatus(1,kTRUE); - - TStopwatch mcmcWatch; - mcmcWatch.Start(); - - RooArgList axisList(deltaMSq, sinSq2theta); - MCMCCalculator mc(*data, modelConfig); - mc.SetNumIters(5000); - mc.SetNumBurnInSteps(100); - mc.SetUseKeys(true); - mc.SetTestSize(.1); - mc.SetAxes(axisList); // set which is x and y axis in posterior histogram - //mc.SetNumBins(50); - mcInt = (MCMCInterval*)mc.GetInterval(); - - mcmcWatch.Stop(); - mcmcWatch.Print(); - } - //////////////////////////////////////////// - // make plot of resulting interval - - dataCanvas->cd(4); - - // first plot a small dot for every point tested - if (doFeldmanCousins) { - RooDataHist* parameterScan = (RooDataHist*) fc.GetPointsToScan(); - TH2F* hist = (TH2F*) parameterScan->createHistogram("sinSq2theta:deltaMSq",30,30); - // hist->Draw(); - TH2F* forContour = (TH2F*)hist->Clone(); - - // now loop through the points and put a marker if it's in the interval - RooArgSet* tmpPoint; - // loop over points to test - for(Int_t i=0; inumEntries(); ++i){ - // get a parameter point from the list of points to test. - tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); - - if (interval){ - if (interval->IsInInterval( *tmpPoint ) ) { - forContour->SetBinContent( hist->FindBin(tmpPoint->getRealValue("sinSq2theta"), - tmpPoint->getRealValue("deltaMSq")), 1); - }else{ - forContour->SetBinContent( hist->FindBin(tmpPoint->getRealValue("sinSq2theta"), - tmpPoint->getRealValue("deltaMSq")), 0); - } - } - - - delete tmpPoint; - } - - if (interval){ - Double_t level=0.5; - forContour->SetContour(1,&level); - forContour->SetLineWidth(2); - forContour->SetLineColor(kRed); - forContour->Draw("cont2,same"); - } - } - - MCMCIntervalPlot* mcPlot = NULL; - if (mcInt) { - cout << "MCMC actual confidence level: " - << mcInt->GetActualConfidenceLevel() << endl; - mcPlot = new MCMCIntervalPlot(*mcInt); - mcPlot->SetLineColor(kMagenta); - mcPlot->Draw(); - } - dataCanvas->Update(); - - LikelihoodIntervalPlot plotInt((LikelihoodInterval*)plcInterval); - plotInt.SetTitle("90% Confidence Intervals"); - if (mcInt) - plotInt.Draw("same"); - else - plotInt.Draw(); - dataCanvas->Update(); - - /// print timing info - t.Stop(); - t.Print(); + // to time the macro + TStopwatch t; + t.Start(); + + + /* + Taken from Feldman & Cousins paper, Phys.Rev.D57:3873-3889,1998. + e-Print: physics/9711021 (see page 13.) + + Quantum mechanics dictates that the probability of such a transformation is given by the formula + P (νµ → ν e ) = sin^2 (2θ) sin^2 (1.27 ∆m^2 L /E ) + where P is the probability for a νµ to transform into a νe , L is the distance in km between + the creation of the neutrino from meson decay and its interaction in the detector, E is the + neutrino energy in GeV, and ∆m^2 = |m^2− m^2 | in (eV/c^2 )^2 . + + To demonstrate how this works in practice, and how it compares to alternative approaches + that have been used, we consider a toy model of a typical neutrino oscillation experiment. + The toy model is defined by the following parameters: Mesons are assumed to decay to + neutrinos uniformly in a region 600 m to 1000 m from the detector. The expected background + from conventional νe interactions and misidentified νµ interactions is assumed to be 100 + events in each of 5 energy bins which span the region from 10 to 60 GeV. We assume that + the νµ flux is such that if P (νµ → ν e ) = 0.01 averaged over any bin, then that bin would + have an expected additional contribution of 100 events due to νµ → ν e oscillations. + */ + + // Make signal model model + RooRealVar E("E","", 15,10,60,"GeV"); + RooRealVar L("L","", .800,.600, 1.0,"km"); // need these units in formula + RooRealVar deltaMSq("deltaMSq","#Delta m^{2}",40,1,300,"eV/c^{2}"); + RooRealVar sinSq2theta("sinSq2theta","sin^{2}(2#theta)", .006,.0,.02); + //RooRealVar deltaMSq("deltaMSq","#Delta m^{2}",40,20,70,"eV/c^{2}"); + // RooRealVar sinSq2theta("sinSq2theta","sin^{2}(2#theta)", .006,.001,.01); + // PDF for oscillation only describes deltaMSq dependence, sinSq2theta goes into sigNorm + // 1) The code for this PDF was created by issuing these commands + // root [0] RooClassFactory x + // root [1] x.makePdf("NuMuToNuE_Oscillation","L,E,deltaMSq","","pow(sin(1.27*deltaMSq*L/E),2)") + NuMuToNuE_Oscillation PnmuTone("PnmuTone","P(#nu_{#mu} #rightarrow #nu_{e}",L,E,deltaMSq); + + // only E is observable, so create the signal model by integrating out L + RooAbsPdf* sigModel = PnmuTone.createProjection(L); + + // create \int dE' dL' P(E',L' | \Delta m^2). + // Given RooFit will renormalize the PDF in the range of the observables, + // the average probability to oscillate in the experiment's acceptance + // needs to be incorporated into the extended term in the likelihood. + // Do this by creating a RooAbsReal representing the integral and divide by + // the area in the E-L plane. + // The integral should be over "primed" observables, so we need + // an independent copy of PnmuTone not to interfere with the original. + + // Independent copy for Integral + RooRealVar EPrime("EPrime","", 15,10,60,"GeV"); + RooRealVar LPrime("LPrime","", .800,.600, 1.0,"km"); // need these units in formula + NuMuToNuE_Oscillation PnmuTonePrime("PnmuTonePrime","P(#nu_{#mu} #rightarrow #nu_{e}", + LPrime,EPrime,deltaMSq); + RooAbsReal* intProbToOscInExp = PnmuTonePrime.createIntegral(RooArgSet(EPrime,LPrime)); + + // Getting the flux is a bit tricky. It is more celear to include a cross section term that is not + // explicitly refered to in the text, eg. + // # events in bin = flux * cross-section for nu_e interaction in E bin * average prob nu_mu osc. to nu_e in bin + // let maxEventsInBin = flux * cross-section for nu_e interaction in E bin + // maxEventsInBin * 1% chance per bin = 100 events / bin + // therefore maxEventsInBin = 10,000. + // for 5 bins, this means maxEventsTot = 50,000 + RooConstVar maxEventsTot("maxEventsTot","maximum number of sinal events",50000); + RooConstVar inverseArea("inverseArea","1/(#Delta E #Delta L)", + 1./(EPrime.getMax()-EPrime.getMin())/(LPrime.getMax()-LPrime.getMin())); + + // sigNorm = maxEventsTot * (\int dE dL prob to oscillate in experiment / Area) * sin^2(2\theta) + RooProduct sigNorm("sigNorm", "", RooArgSet(maxEventsTot, *intProbToOscInExp, inverseArea, sinSq2theta)); + // bkg = 5 bins * 100 events / bin + RooConstVar bkgNorm("bkgNorm","normalization for background",500); + + // flat background (0th order polynomial, so no arguments for coefficients) + RooPolynomial bkgEShape("bkgEShape","flat bkg shape", E); + + // total model + RooAddPdf model("model","",RooArgList(*sigModel,bkgEShape), + RooArgList(sigNorm,bkgNorm)); + + // for debugging, check model tree + // model.printCompactTree(); + // model.graphVizTree("model.dot"); + + + // turn off some messages + RooMsgService::instance().setStreamStatus(0,kFALSE); + RooMsgService::instance().setStreamStatus(1,kFALSE); + RooMsgService::instance().setStreamStatus(2,kFALSE); + + + ////////////////////////////////////////////// + // n events in data to data, simply sum of sig+bkg + Int_t nEventsData = bkgNorm.getVal()+sigNorm.getVal(); + cout << "generate toy data with nEvents = " << nEventsData << endl; + // adjust random seed to get a toy dataset similar to one in paper. + // Found by trial and error (3 trials, so not very "fine tuned") + RooRandom::randomGenerator()->SetSeed(3); + // create a toy dataset + RooDataSet* data = model.generate(RooArgSet(E), nEventsData); + + ///////////////////////////////////////////// + // make some plots + TCanvas* dataCanvas = new TCanvas("dataCanvas"); + dataCanvas->Divide(2,2); + + // plot the PDF + dataCanvas->cd(1); + TH1* hh = PnmuTone.createHistogram("hh",E,Binning(40),YVar(L,Binning(40)),Scaling(kFALSE)) ; + hh->SetLineColor(kBlue) ; + hh->SetTitle("True Signal Model"); + hh->Draw("surf"); + + // plot the data with the best fit + dataCanvas->cd(2); + RooPlot* Eframe = E.frame(); + data->plotOn(Eframe); + model.fitTo(*data, Extended()); + model.plotOn(Eframe); + model.plotOn(Eframe,Components(*sigModel),LineColor(kRed)); + model.plotOn(Eframe,Components(bkgEShape),LineColor(kGreen)); + model.plotOn(Eframe); + Eframe->SetTitle("toy data with best fit model (and sig+bkg components)"); + Eframe->Draw(); + + // plot the likelihood function + dataCanvas->cd(3); + RooNLLVar nll("nll", "nll", model, *data, Extended()); + RooProfileLL pll("pll", "", nll, RooArgSet(deltaMSq, sinSq2theta)); + // TH1* hhh = nll.createHistogram("hhh",sinSq2theta,Binning(40),YVar(deltaMSq,Binning(40))) ; + TH1* hhh = pll.createHistogram("hhh",sinSq2theta,Binning(40),YVar(deltaMSq,Binning(40)),Scaling(kFALSE)) ; + hhh->SetLineColor(kBlue) ; + hhh->SetTitle("Likelihood Function"); + hhh->Draw("surf"); + + dataCanvas->Update(); + + + + ////////////////////////////////////////////////////////// + //////// show use of Feldman-Cousins utility in RooStats + // set the distribution creator, which encodes the test statistic + RooArgSet parameters(deltaMSq, sinSq2theta); + RooWorkspace* w = new RooWorkspace(); + + ModelConfig modelConfig; + modelConfig.SetWorkspace(*w); + modelConfig.SetPdf(model); + modelConfig.SetParametersOfInterest(parameters); + + RooStats::FeldmanCousins fc(*data, modelConfig); + fc.SetTestSize(.1); // set size of test + fc.UseAdaptiveSampling(true); + fc.SetNBins(10); // number of points to test per parameter + + // use the Feldman-Cousins tool + ConfInterval* interval = 0; + if(doFeldmanCousins) + interval = fc.GetInterval(); + + + /////////////////////////////////////////////////////////////////// + ///////// show use of ProfileLikeihoodCalculator utility in RooStats + RooStats::ProfileLikelihoodCalculator plc(*data, modelConfig); + plc.SetTestSize(.1); + + ConfInterval* plcInterval = plc.GetInterval(); + + /////////////////////////////////////////////////////////////////// + ///////// show use of MCMCCalculator utility in RooStats + MCMCInterval* mcInt = NULL; + + if (doMCMC) { + // turn some messages back on + RooMsgService::instance().setStreamStatus(0,kTRUE); + RooMsgService::instance().setStreamStatus(1,kTRUE); + + TStopwatch mcmcWatch; + mcmcWatch.Start(); + + RooArgList axisList(deltaMSq, sinSq2theta); + MCMCCalculator mc(*data, modelConfig); + mc.SetNumIters(5000); + mc.SetNumBurnInSteps(100); + mc.SetUseKeys(true); + mc.SetTestSize(.1); + mc.SetAxes(axisList); // set which is x and y axis in posterior histogram + //mc.SetNumBins(50); + mcInt = (MCMCInterval*)mc.GetInterval(); + + mcmcWatch.Stop(); + mcmcWatch.Print(); + } + //////////////////////////////////////////// + // make plot of resulting interval + + dataCanvas->cd(4); + + // first plot a small dot for every point tested + if (doFeldmanCousins) { + RooDataHist* parameterScan = (RooDataHist*) fc.GetPointsToScan(); + TH2F* hist = (TH2F*) parameterScan->createHistogram("sinSq2theta:deltaMSq",30,30); + // hist->Draw(); + TH2F* forContour = (TH2F*)hist->Clone(); + + // now loop through the points and put a marker if it's in the interval + RooArgSet* tmpPoint; + // loop over points to test + for(Int_t i=0; inumEntries(); ++i){ + // get a parameter point from the list of points to test. + tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); + + if (interval){ + if (interval->IsInInterval( *tmpPoint ) ) { + forContour->SetBinContent( hist->FindBin(tmpPoint->getRealValue("sinSq2theta"), + tmpPoint->getRealValue("deltaMSq")), 1); + }else{ + forContour->SetBinContent( hist->FindBin(tmpPoint->getRealValue("sinSq2theta"), + tmpPoint->getRealValue("deltaMSq")), 0); + } + } + + + delete tmpPoint; + } + + if (interval){ + Double_t level=0.5; + forContour->SetContour(1,&level); + forContour->SetLineWidth(2); + forContour->SetLineColor(kRed); + forContour->Draw("cont2,same"); + } + } + + MCMCIntervalPlot* mcPlot = NULL; + if (mcInt) { + cout << "MCMC actual confidence level: " + << mcInt->GetActualConfidenceLevel() << endl; + mcPlot = new MCMCIntervalPlot(*mcInt); + mcPlot->SetLineColor(kMagenta); + mcPlot->Draw(); + } + dataCanvas->Update(); + + LikelihoodIntervalPlot plotInt((LikelihoodInterval*)plcInterval); + plotInt.SetTitle("90% Confidence Intervals"); + if (mcInt) + plotInt.Draw("same"); + else + plotInt.Draw(); + dataCanvas->Update(); + + /// print timing info + t.Stop(); + t.Print(); } diff --git a/tutorials/roostats/rs601_HLFactoryexample.C b/tutorials/roostats/rs601_HLFactoryexample.C index 27e578a6c2809..fb43100db0fd7 100644 --- a/tutorials/roostats/rs601_HLFactoryexample.C +++ b/tutorials/roostats/rs601_HLFactoryexample.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// 'High Level Factory Example' RooStats tutorial macro #601 /// /// This tutorial shows an example of creating a simple @@ -24,49 +25,49 @@ // use this order for safety on library loading -using namespace RooFit ; -using namespace RooStats ; +using namespace RooFit; +using namespace RooStats; using namespace std; void rs601_HLFactoryexample() { - // --- Build the datacard and dump to file--- + // --- Build the datacard and dump to file--- - TString card_name("HLFavtoryexample.rs"); - ofstream ofile(card_name); - ofile << "// The simplest card\n\n" - << "gauss = Gaussian(mes[5.20,5.30],mean[5.28,5.2,5.3],width[0.0027,0.001,1]);\n" - << "argus = ArgusBG(mes,5.291,argpar[-20,-100,-1]);\n" - << "sum = SUM(nsig[200,0,10000]*gauss,nbkg[800,0,10000]*argus);\n\n"; + TString card_name("HLFavtoryexample.rs"); + ofstream ofile(card_name); + ofile << "// The simplest card\n\n" + << "gauss = Gaussian(mes[5.20,5.30],mean[5.28,5.2,5.3],width[0.0027,0.001,1]);\n" + << "argus = ArgusBG(mes,5.291,argpar[-20,-100,-1]);\n" + << "sum = SUM(nsig[200,0,10000]*gauss,nbkg[800,0,10000]*argus);\n\n"; - ofile.close(); + ofile.close(); - HLFactory hlf("HLFavtoryexample", + HLFactory hlf("HLFavtoryexample", card_name, false); - // --- Take elements out of the internal workspace --- + // --- Take elements out of the internal workspace --- - RooWorkspace* w = hlf.GetWs(); + RooWorkspace* w = hlf.GetWs(); - RooRealVar* mes = dynamic_cast(w->arg("mes")); - RooAbsPdf* sum = dynamic_cast(w->pdf("sum")); - RooAbsPdf* argus = dynamic_cast(w->pdf("argus")); -// RooRealVar* mean = dynamic_cast(w->arg("mean")); -// RooRealVar* argpar = dynamic_cast(w->arg("argpar")); + RooRealVar* mes = dynamic_cast(w->arg("mes")); + RooAbsPdf* sum = dynamic_cast(w->pdf("sum")); + RooAbsPdf* argus = dynamic_cast(w->pdf("argus")); +// RooRealVar* mean = dynamic_cast(w->arg("mean")); +// RooRealVar* argpar = dynamic_cast(w->arg("argpar")); - // --- Generate a toyMC sample from composite PDF --- - RooDataSet *data = sum->generate(*mes,2000) ; + // --- Generate a toyMC sample from composite PDF --- + RooDataSet *data = sum->generate(*mes,2000) ; - // --- Perform extended ML fit of composite PDF to toy data --- - sum->fitTo(*data) ; + // --- Perform extended ML fit of composite PDF to toy data --- + sum->fitTo(*data) ; - // --- Plot toy data and composite PDF overlaid --- - RooPlot* mesframe = mes->frame() ; - data->plotOn(mesframe) ; - sum->plotOn(mesframe) ; - sum->plotOn(mesframe,Components(*argus),LineStyle(kDashed)) ; + // --- Plot toy data and composite PDF overlaid --- + RooPlot* mesframe = mes->frame() ; + data->plotOn(mesframe) ; + sum->plotOn(mesframe) ; + sum->plotOn(mesframe,Components(*argus),LineStyle(kDashed)) ; - gROOT->SetStyle("Plain"); - mesframe->Draw() ; + gROOT->SetStyle("Plain"); + mesframe->Draw() ; } \ No newline at end of file diff --git a/tutorials/roostats/rs602_HLFactoryCombinationexample.C b/tutorials/roostats/rs602_HLFactoryCombinationexample.C index d55eb051af43d..476691130e908 100644 --- a/tutorials/roostats/rs602_HLFactoryCombinationexample.C +++ b/tutorials/roostats/rs602_HLFactoryCombinationexample.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// 'High Level Factory Example' RooStats tutorial macro #602 /// /// This tutorial shows an example of creating a combined @@ -23,55 +24,54 @@ #include "RooStats/HLFactory.h" - // use this order for safety on library loading -using namespace RooFit ; -using namespace RooStats ; +using namespace RooFit; +using namespace RooStats; using namespace std; void rs602_HLFactoryCombinationexample() { -using namespace RooStats; -using namespace RooFit; + using namespace RooStats; + using namespace RooFit; -// create a card -TString card_name("HLFavtoryCombinationexample.rs"); -ofstream ofile(card_name); -ofile << "// The simplest card for combination\n\n" - << "gauss1 = Gaussian(x[0,100],mean1[50,0,100],4);\n" - << "flat1 = Polynomial(x,0);\n" - << "sb_model1 = SUM(nsig1[120,0,300]*gauss1 , nbkg1[100,0,1000]*flat1);\n" - << "gauss2 = Gaussian(x,mean2[80,0,100],5);\n" - << "flat2 = Polynomial(x,0);\n" - << "sb_model2 = SUM(nsig2[90,0,400]*gauss2 , nbkg2[80,0,1000]*flat2);\n"; + // create a card + TString card_name("HLFavtoryCombinationexample.rs"); + ofstream ofile(card_name); + ofile << "// The simplest card for combination\n\n" + << "gauss1 = Gaussian(x[0,100],mean1[50,0,100],4);\n" + << "flat1 = Polynomial(x,0);\n" + << "sb_model1 = SUM(nsig1[120,0,300]*gauss1 , nbkg1[100,0,1000]*flat1);\n" + << "gauss2 = Gaussian(x,mean2[80,0,100],5);\n" + << "flat2 = Polynomial(x,0);\n" + << "sb_model2 = SUM(nsig2[90,0,400]*gauss2 , nbkg2[80,0,1000]*flat2);\n"; -ofile.close(); + ofile.close(); -HLFactory hlf("HLFavtoryCombinationexample", - card_name, - false); + HLFactory hlf("HLFavtoryCombinationexample", + card_name, + false); -hlf.AddChannel("model1","sb_model1","flat1"); -hlf.AddChannel("model2","sb_model2","flat2"); -RooAbsPdf* pdf=hlf.GetTotSigBkgPdf(); -RooCategory* thecat = hlf.GetTotCategory(); -RooRealVar* x= static_cast(hlf.GetWs()->arg("x")); + hlf.AddChannel("model1","sb_model1","flat1"); + hlf.AddChannel("model2","sb_model2","flat2"); + RooAbsPdf* pdf=hlf.GetTotSigBkgPdf(); + RooCategory* thecat = hlf.GetTotCategory(); + RooRealVar* x= static_cast(hlf.GetWs()->arg("x")); -RooDataSet* data = pdf->generate(RooArgSet(*x,*thecat),Extended()); + RooDataSet* data = pdf->generate(RooArgSet(*x,*thecat),Extended()); -// --- Perform extended ML fit of composite PDF to toy data --- -pdf->fitTo(*data) ; + // --- Perform extended ML fit of composite PDF to toy data --- + pdf->fitTo(*data) ; -// --- Plot toy data and composite PDF overlaid --- -RooPlot* xframe = x->frame() ; + // --- Plot toy data and composite PDF overlaid --- + RooPlot* xframe = x->frame() ; -data->plotOn(xframe); -thecat->setIndex(0); -pdf->plotOn(xframe,Slice(*thecat),ProjWData(*thecat,*data)) ; + data->plotOn(xframe); + thecat->setIndex(0); + pdf->plotOn(xframe,Slice(*thecat),ProjWData(*thecat,*data)) ; -thecat->setIndex(1); -pdf->plotOn(xframe,Slice(*thecat),ProjWData(*thecat,*data)) ; + thecat->setIndex(1); + pdf->plotOn(xframe,Slice(*thecat),ProjWData(*thecat,*data)) ; -gROOT->SetStyle("Plain"); -xframe->Draw(); + gROOT->SetStyle("Plain"); + xframe->Draw(); } diff --git a/tutorials/roostats/rs603_HLFactoryElaborateExample.C b/tutorials/roostats/rs603_HLFactoryElaborateExample.C index ba3482f266b10..2cf319d5ae0f8 100644 --- a/tutorials/roostats/rs603_HLFactoryElaborateExample.C +++ b/tutorials/roostats/rs603_HLFactoryElaborateExample.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// 'High Level Factory Example' RooStats tutorial macro #602 /// /// This tutorial shows an example of creating a combined @@ -25,114 +26,114 @@ // use this order for safety on library loading -using namespace RooFit ; -using namespace RooStats ; +using namespace RooFit; +using namespace RooStats; using namespace std; void rs603_HLFactoryElaborateExample() { - // --- Prepare the 2 needed datacards for this example --- - - TString card_name("rs603_card_WsMaker.rs"); - ofstream ofile(card_name); - ofile << "// The simplest card for combination\n\n"; - ofile << "gauss1 = Gaussian(x[0,100],mean1[50,0,100],4);\n"; - ofile << "flat1 = Polynomial(x,0);\n"; - ofile << "sb_model1 = SUM(nsig1[120,0,300]*gauss1 , nbkg1[100,0,1000]*flat1);\n\n"; - ofile << "echo In the middle!;\n\n"; - ofile << "gauss2 = Gaussian(x,mean2[80,0,100],5);\n"; - ofile << "flat2 = Polynomial(x,0);\n"; - ofile << "sb_model2 = SUM(nsig2[90,0,400]*gauss2 , nbkg2[80,0,1000]*flat2);\n\n"; - ofile << "echo At the end!;\n"; - ofile.close(); - - TString card_name2("rs603_card.rs"); - ofstream ofile2(card_name2); - ofile2 << "// The simplest card for combination\n\n"; - ofile2 << "gauss1 = Gaussian(x[0,100],mean1[50,0,100],4);\n"; - ofile2 << "flat1 = Polynomial(x,0);\n"; - ofile2 << "sb_model1 = SUM(nsig1[120,0,300]*gauss1 , nbkg1[100,0,1000]*flat1);\n\n"; - ofile2 << "echo In the middle!;\n\n"; - ofile2 << "gauss2 = Gaussian(x,mean2[80,0,100],5);\n"; - ofile2 << "flat2 = Polynomial(x,0);\n"; - ofile2 << "sb_model2 = SUM(nsig2[90,0,400]*gauss2 , nbkg2[80,0,1000]*flat2);\n\n"; - ofile2 << "#include rs603_included_card.rs;\n\n"; - ofile2 << "echo At the end!;\n"; - ofile2.close(); - - TString card_name3("rs603_included_card.rs"); - ofstream ofile3(card_name3); - ofile3 << "echo Now reading the included file!;\n\n"; - ofile3 << "echo Including datasets in a Workspace in a Root file...;\n"; - ofile3 << "data1 = import(rs603_infile.root,\n"; - ofile3 << " rs603_ws,\n"; - ofile3 << " data1);\n\n"; - ofile3 << "data2 = import(rs603_infile.root,\n"; - ofile3 << " rs603_ws,\n"; - ofile3 << " data2);\n"; - ofile3.close(); - -// --- Produce the two separate datasets into a WorkSpace --- - -HLFactory hlf("HLFactoryComplexExample", - "rs603_card_WsMaker.rs", - false); - -RooRealVar* x = static_cast(hlf.GetWs()->arg("x")); -RooAbsPdf* pdf1 = hlf.GetWs()->pdf("sb_model1"); -RooAbsPdf* pdf2 = hlf.GetWs()->pdf("sb_model2"); - -RooWorkspace w("rs603_ws"); - -RooDataSet* data1 = pdf1->generate(RooArgSet(*x),Extended()); -data1->SetName("data1"); -w.import(*data1); - -RooDataSet* data2 = pdf2->generate(RooArgSet(*x),Extended()); -data2->SetName("data2"); -w.import(*data2); - -// --- Write the WorkSpace into a rootfile --- - -TFile outfile("rs603_infile.root","RECREATE"); -w.Write(); -outfile.Close(); - -cout << "-------------------------------------------------------------------\n" - << " Rootfile and Workspace prepared \n" - << "-------------------------------------------------------------------\n"; - - -HLFactory hlf_2("HLFactoryElaborateExample", - "rs603_card.rs", - false); - -x = hlf_2.GetWs()->var("x"); -pdf1 = hlf_2.GetWs()->pdf("sb_model1"); -pdf2 = hlf_2.GetWs()->pdf("sb_model2"); - -hlf_2.AddChannel("model1","sb_model1","flat1","data1"); -hlf_2.AddChannel("model2","sb_model2","flat2","data2"); - -RooDataSet* data = hlf_2.GetTotDataSet(); -RooAbsPdf* pdf = hlf_2.GetTotSigBkgPdf(); -RooCategory* thecat = hlf_2.GetTotCategory(); - -// --- Perform extended ML fit of composite PDF to toy data --- -pdf->fitTo(*data) ; - -// --- Plot toy data and composite PDF overlaid --- -RooPlot* xframe = x->frame() ; - -data->plotOn(xframe); -thecat->setIndex(0); -pdf->plotOn(xframe,Slice(*thecat),ProjWData(*thecat,*data)) ; - -thecat->setIndex(1); -pdf->plotOn(xframe,Slice(*thecat),ProjWData(*thecat,*data)) ; - -gROOT->SetStyle("Plain"); - -xframe->Draw(); + // --- Prepare the 2 needed datacards for this example --- + + TString card_name("rs603_card_WsMaker.rs"); + ofstream ofile(card_name); + ofile << "// The simplest card for combination\n\n"; + ofile << "gauss1 = Gaussian(x[0,100],mean1[50,0,100],4);\n"; + ofile << "flat1 = Polynomial(x,0);\n"; + ofile << "sb_model1 = SUM(nsig1[120,0,300]*gauss1 , nbkg1[100,0,1000]*flat1);\n\n"; + ofile << "echo In the middle!;\n\n"; + ofile << "gauss2 = Gaussian(x,mean2[80,0,100],5);\n"; + ofile << "flat2 = Polynomial(x,0);\n"; + ofile << "sb_model2 = SUM(nsig2[90,0,400]*gauss2 , nbkg2[80,0,1000]*flat2);\n\n"; + ofile << "echo At the end!;\n"; + ofile.close(); + + TString card_name2("rs603_card.rs"); + ofstream ofile2(card_name2); + ofile2 << "// The simplest card for combination\n\n"; + ofile2 << "gauss1 = Gaussian(x[0,100],mean1[50,0,100],4);\n"; + ofile2 << "flat1 = Polynomial(x,0);\n"; + ofile2 << "sb_model1 = SUM(nsig1[120,0,300]*gauss1 , nbkg1[100,0,1000]*flat1);\n\n"; + ofile2 << "echo In the middle!;\n\n"; + ofile2 << "gauss2 = Gaussian(x,mean2[80,0,100],5);\n"; + ofile2 << "flat2 = Polynomial(x,0);\n"; + ofile2 << "sb_model2 = SUM(nsig2[90,0,400]*gauss2 , nbkg2[80,0,1000]*flat2);\n\n"; + ofile2 << "#include rs603_included_card.rs;\n\n"; + ofile2 << "echo At the end!;\n"; + ofile2.close(); + + TString card_name3("rs603_included_card.rs"); + ofstream ofile3(card_name3); + ofile3 << "echo Now reading the included file!;\n\n"; + ofile3 << "echo Including datasets in a Workspace in a Root file...;\n"; + ofile3 << "data1 = import(rs603_infile.root,\n"; + ofile3 << " rs603_ws,\n"; + ofile3 << " data1);\n\n"; + ofile3 << "data2 = import(rs603_infile.root,\n"; + ofile3 << " rs603_ws,\n"; + ofile3 << " data2);\n"; + ofile3.close(); + + // --- Produce the two separate datasets into a WorkSpace --- + + HLFactory hlf("HLFactoryComplexExample", + "rs603_card_WsMaker.rs", + false); + + RooRealVar* x = static_cast(hlf.GetWs()->arg("x")); + RooAbsPdf* pdf1 = hlf.GetWs()->pdf("sb_model1"); + RooAbsPdf* pdf2 = hlf.GetWs()->pdf("sb_model2"); + + RooWorkspace w("rs603_ws"); + + RooDataSet* data1 = pdf1->generate(RooArgSet(*x),Extended()); + data1->SetName("data1"); + w.import(*data1); + + RooDataSet* data2 = pdf2->generate(RooArgSet(*x),Extended()); + data2->SetName("data2"); + w.import(*data2); + + // --- Write the WorkSpace into a rootfile --- + + TFile outfile("rs603_infile.root","RECREATE"); + w.Write(); + outfile.Close(); + + cout << "-------------------------------------------------------------------\n" + << " Rootfile and Workspace prepared \n" + << "-------------------------------------------------------------------\n"; + + + HLFactory hlf_2("HLFactoryElaborateExample", + "rs603_card.rs", + false); + + x = hlf_2.GetWs()->var("x"); + pdf1 = hlf_2.GetWs()->pdf("sb_model1"); + pdf2 = hlf_2.GetWs()->pdf("sb_model2"); + + hlf_2.AddChannel("model1","sb_model1","flat1","data1"); + hlf_2.AddChannel("model2","sb_model2","flat2","data2"); + + RooDataSet* data = hlf_2.GetTotDataSet(); + RooAbsPdf* pdf = hlf_2.GetTotSigBkgPdf(); + RooCategory* thecat = hlf_2.GetTotCategory(); + + // --- Perform extended ML fit of composite PDF to toy data --- + pdf->fitTo(*data) ; + + // --- Plot toy data and composite PDF overlaid --- + RooPlot* xframe = x->frame() ; + + data->plotOn(xframe); + thecat->setIndex(0); + pdf->plotOn(xframe,Slice(*thecat),ProjWData(*thecat,*data)) ; + + thecat->setIndex(1); + pdf->plotOn(xframe,Slice(*thecat),ProjWData(*thecat,*data)) ; + + gROOT->SetStyle("Plain"); + + xframe->Draw(); } diff --git a/tutorials/roostats/rs701_BayesianCalculator.C b/tutorials/roostats/rs701_BayesianCalculator.C index 39455f949c619..1377685876e25 100644 --- a/tutorials/roostats/rs701_BayesianCalculator.C +++ b/tutorials/roostats/rs701_BayesianCalculator.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook /// 'Bayesian Calculator' RooStats tutorial macro #701 /// /// This tutorial shows an example of using the BayesianCalculator class diff --git a/tutorials/roostats/rs801_HypoTestInverterOriginal.C b/tutorials/roostats/rs801_HypoTestInverterOriginal.C index 3a3064c96d22e..190bf9aa9a4eb 100644 --- a/tutorials/roostats/rs801_HypoTestInverterOriginal.C +++ b/tutorials/roostats/rs801_HypoTestInverterOriginal.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// 'Hypothesis Test Inversion' RooStats tutorial macro #801 /// /// This tutorial shows an example of using the HypoTestInverterOriginal class @@ -32,48 +33,48 @@ using namespace RooStats; void rs801_HypoTestInverterOriginal() { - // prepare the model - RooRealVar lumi("lumi","luminosity",1); - RooRealVar r("r","cross-section ratio",3.74,0,50); - RooFormulaVar ns("ns","1*r*lumi",RooArgList(lumi,r)); - RooRealVar nb("nb","background yield",1); - RooRealVar x("x","dummy observable",0,1); - RooConstVar p0(RooFit::RooConst(0)); - RooPolynomial flatPdf("flatPdf","flat PDF",x,p0); - RooAddPdf totPdf("totPdf","S+B model",RooArgList(flatPdf,flatPdf),RooArgList(ns,nb)); - RooExtendPdf bkgPdf("bkgPdf","B-only model",flatPdf,nb); - RooDataSet* data = totPdf.generate(x,1); + // prepare the model + RooRealVar lumi("lumi","luminosity",1); + RooRealVar r("r","cross-section ratio",3.74,0,50); + RooFormulaVar ns("ns","1*r*lumi",RooArgList(lumi,r)); + RooRealVar nb("nb","background yield",1); + RooRealVar x("x","dummy observable",0,1); + RooConstVar p0(RooFit::RooConst(0)); + RooPolynomial flatPdf("flatPdf","flat PDF",x,p0); + RooAddPdf totPdf("totPdf","S+B model",RooArgList(flatPdf,flatPdf),RooArgList(ns,nb)); + RooExtendPdf bkgPdf("bkgPdf","B-only model",flatPdf,nb); + RooDataSet* data = totPdf.generate(x,1); - // prepare the calculator - HybridCalculatorOriginal myhc(*data, totPdf, bkgPdf,0,0); - myhc.SetTestStatistic(2); - myhc.SetNumberOfToys(1000); - myhc.UseNuisance(false); + // prepare the calculator + HybridCalculatorOriginal myhc(*data, totPdf, bkgPdf,0,0); + myhc.SetTestStatistic(2); + myhc.SetNumberOfToys(1000); + myhc.UseNuisance(false); - // run the hypothesis-test invertion - HypoTestInverterOriginal myInverter(myhc,r); - myInverter.SetTestSize(0.10); - myInverter.UseCLs(true); - // myInverter.RunFixedScan(5,1,6); - // scan for a 95% UL - myInverter.RunAutoScan(3.,5,myInverter.Size()/2,0.005); - // run an alternative autoscan algorithm - // myInverter.RunAutoScan(1,6,myInverter.Size()/2,0.005,1); - //myInverter.RunOnePoint(3.9); + // run the hypothesis-test invertion + HypoTestInverterOriginal myInverter(myhc,r); + myInverter.SetTestSize(0.10); + myInverter.UseCLs(true); + // myInverter.RunFixedScan(5,1,6); + // scan for a 95% UL + myInverter.RunAutoScan(3.,5,myInverter.Size()/2,0.005); + // run an alternative autoscan algorithm + // myInverter.RunAutoScan(1,6,myInverter.Size()/2,0.005,1); + //myInverter.RunOnePoint(3.9); - HypoTestInverterResult* results = myInverter.GetInterval(); + HypoTestInverterResult* results = myInverter.GetInterval(); - HypoTestInverterPlot myInverterPlot("myInverterPlot","",results); - TGraphErrors* gr1 = myInverterPlot.MakePlot(); - gr1->Draw("ALP"); + HypoTestInverterPlot myInverterPlot("myInverterPlot","",results); + TGraphErrors* gr1 = myInverterPlot.MakePlot(); + gr1->Draw("ALP"); - double ulError = results->UpperLimitEstimatedError(); + double ulError = results->UpperLimitEstimatedError(); - double upperLimit = results->UpperLimit(); - std::cout << "The computed upper limit is: " << upperLimit << std::endl; - std::cout << "an estimated error on this upper limit is: " << ulError << std::endl; - // expected result: 4.10 + double upperLimit = results->UpperLimit(); + std::cout << "The computed upper limit is: " << upperLimit << std::endl; + std::cout << "an estimated error on this upper limit is: " << ulError << std::endl; + // expected result: 4.10 } int main() { rs801_HypoTestInverterOriginal(); diff --git a/tutorials/roostats/rs_bernsteinCorrection.C b/tutorials/roostats/rs_bernsteinCorrection.C index 8c4c24473e70b..5c7a994736917 100644 --- a/tutorials/roostats/rs_bernsteinCorrection.C +++ b/tutorials/roostats/rs_bernsteinCorrection.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// 'Bernstein Correction' RooStats tutorial macro /// /// This tutorial shows usage of a the BernsteinCorrection utility in RooStats. @@ -25,9 +26,6 @@ /// /// \author Kyle Cranmer -#ifndef __CINT__ -#include "RooGlobalFunc.h" -#endif #include "RooDataSet.h" #include "RooRealVar.h" #include "RooConstVar.h" @@ -61,96 +59,96 @@ using namespace RooStats; //____________________________________ void rs_bernsteinCorrection(){ - // set range of observable - Double_t lowRange = -1, highRange =5; - - // make a RooRealVar for the observable - RooRealVar x("x", "x", lowRange, highRange); - - // true model - RooGaussian narrow("narrow","",x,RooConst(0.), RooConst(.8)); - RooGaussian wide("wide","",x,RooConst(0.), RooConst(2.)); - RooAddPdf reality("reality","",RooArgList(narrow, wide), RooConst(0.8)); - - RooDataSet* data = reality.generate(x,1000); - - // nominal model - RooRealVar sigma("sigma","",1.,0,10); - RooGaussian nominal("nominal","",x,RooConst(0.), sigma); - - RooWorkspace* wks = new RooWorkspace("myWorksspace"); - - wks->import(*data, Rename("data")); - wks->import(nominal); - - // use Minuit2 - ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2"); - - // The tolerance sets the probability to add an unnecessary term. - // lower tolerance will add fewer terms, while higher tolerance - // will add more terms and provide a more flexible function. - Double_t tolerance = 0.05; - BernsteinCorrection bernsteinCorrection(tolerance); - Int_t degree = bernsteinCorrection.ImportCorrectedPdf(wks,"nominal","x","data"); - - if (degree < 0) { - Error("rs_bernsteinCorrection","Bernstein correction failed ! "); - return; - } - - cout << " Correction based on Bernstein Poly of degree " << degree << endl; - - - RooPlot* frame = x.frame(); - data->plotOn(frame); - // plot the best fit nominal model in blue - TString minimType = ROOT::Math::MinimizerOptions::DefaultMinimizerType(); - nominal.fitTo(*data,PrintLevel(0),Minimizer(minimType)); - nominal.plotOn(frame); - - // plot the best fit corrected model in red - RooAbsPdf* corrected = wks->pdf("corrected"); - if (!corrected) return; - - // fit corrected model - corrected->fitTo(*data,PrintLevel(0),Minimizer(minimType) ); - corrected->plotOn(frame,LineColor(kRed)); - - // plot the correction term (* norm constant) in dashed green - // should make norm constant just be 1, not depend on binning of data - RooAbsPdf* poly = wks->pdf("poly"); - if (poly) - poly->plotOn(frame,LineColor(kGreen), LineStyle(kDashed)); - - // this is a switch to check the sampling distribution - // of -2 log LR for two comparisons: - // the first is for n-1 vs. n degree polynomial corrections - // the second is for n vs. n+1 degree polynomial corrections - // Here we choose n to be the one chosen by the tolerance - // critereon above, eg. n = "degree" in the code. - // Setting this to true is takes about 10 min. - bool checkSamplingDist = true; - int numToyMC = 20; // increse this value for sensible results - - TCanvas* c1 = new TCanvas(); - if(checkSamplingDist) { - c1->Divide(1,2); - c1->cd(1); - } - frame->Draw(); - gPad->Update(); - - if(checkSamplingDist) { - // check sampling dist - ROOT::Math::MinimizerOptions::SetDefaultPrintLevel(-1); - TH1F* samplingDist = new TH1F("samplingDist","",20,0,10); - TH1F* samplingDistExtra = new TH1F("samplingDistExtra","",20,0,10); - bernsteinCorrection.CreateQSamplingDist(wks,"nominal","x","data",samplingDist, samplingDistExtra, degree,numToyMC); - - c1->cd(2); - samplingDistExtra->SetLineColor(kRed); - samplingDistExtra->Draw(); - samplingDist->Draw("same"); - } + // set range of observable + Double_t lowRange = -1, highRange =5; + + // make a RooRealVar for the observable + RooRealVar x("x", "x", lowRange, highRange); + + // true model + RooGaussian narrow("narrow","",x,RooConst(0.), RooConst(.8)); + RooGaussian wide("wide","",x,RooConst(0.), RooConst(2.)); + RooAddPdf reality("reality","",RooArgList(narrow, wide), RooConst(0.8)); + + RooDataSet* data = reality.generate(x,1000); + + // nominal model + RooRealVar sigma("sigma","",1.,0,10); + RooGaussian nominal("nominal","",x,RooConst(0.), sigma); + + RooWorkspace* wks = new RooWorkspace("myWorksspace"); + + wks->import(*data, Rename("data")); + wks->import(nominal); + + // use Minuit2 + ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2"); + + // The tolerance sets the probability to add an unnecessary term. + // lower tolerance will add fewer terms, while higher tolerance + // will add more terms and provide a more flexible function. + Double_t tolerance = 0.05; + BernsteinCorrection bernsteinCorrection(tolerance); + Int_t degree = bernsteinCorrection.ImportCorrectedPdf(wks,"nominal","x","data"); + + if (degree < 0) { + Error("rs_bernsteinCorrection","Bernstein correction failed ! "); + return; + } + + cout << " Correction based on Bernstein Poly of degree " << degree << endl; + + + RooPlot* frame = x.frame(); + data->plotOn(frame); + // plot the best fit nominal model in blue + TString minimType = ROOT::Math::MinimizerOptions::DefaultMinimizerType(); + nominal.fitTo(*data,PrintLevel(0),Minimizer(minimType)); + nominal.plotOn(frame); + + // plot the best fit corrected model in red + RooAbsPdf* corrected = wks->pdf("corrected"); + if (!corrected) return; + + // fit corrected model + corrected->fitTo(*data,PrintLevel(0),Minimizer(minimType) ); + corrected->plotOn(frame,LineColor(kRed)); + + // plot the correction term (* norm constant) in dashed green + // should make norm constant just be 1, not depend on binning of data + RooAbsPdf* poly = wks->pdf("poly"); + if (poly) + poly->plotOn(frame,LineColor(kGreen), LineStyle(kDashed)); + + // this is a switch to check the sampling distribution + // of -2 log LR for two comparisons: + // the first is for n-1 vs. n degree polynomial corrections + // the second is for n vs. n+1 degree polynomial corrections + // Here we choose n to be the one chosen by the tolerance + // critereon above, eg. n = "degree" in the code. + // Setting this to true is takes about 10 min. + bool checkSamplingDist = true; + int numToyMC = 20; // increse this value for sensible results + + TCanvas* c1 = new TCanvas(); + if(checkSamplingDist) { + c1->Divide(1,2); + c1->cd(1); + } + frame->Draw(); + gPad->Update(); + + if(checkSamplingDist) { + // check sampling dist + ROOT::Math::MinimizerOptions::SetDefaultPrintLevel(-1); + TH1F* samplingDist = new TH1F("samplingDist","",20,0,10); + TH1F* samplingDistExtra = new TH1F("samplingDistExtra","",20,0,10); + bernsteinCorrection.CreateQSamplingDist(wks,"nominal","x","data",samplingDist, samplingDistExtra, degree,numToyMC); + + c1->cd(2); + samplingDistExtra->SetLineColor(kRed); + samplingDistExtra->Draw(); + samplingDist->Draw("same"); + } } diff --git a/tutorials/roostats/rs_numberCountingCombination.C b/tutorials/roostats/rs_numberCountingCombination.C index ab2c5a178f235..42912f02bbea8 100644 --- a/tutorials/roostats/rs_numberCountingCombination.C +++ b/tutorials/roostats/rs_numberCountingCombination.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -js /// 'Number Counting Example' RooStats tutorial macro #100 /// /// This tutorial shows an example of a combination of @@ -26,9 +27,7 @@ /// /// \author Kyle Cranmer -#ifndef __CINT__ -#include "RooGlobalFunc.h" -#endif + #include "RooStats/ProfileLikelihoodCalculator.h" #include "RooStats/NumberCountingPdfFactory.h" #include "RooStats/ConfInterval.h" @@ -37,8 +36,8 @@ #include "RooRealVar.h" // use this order for safety on library loading -using namespace RooFit ; -using namespace RooStats ; +using namespace RooFit; +using namespace RooStats; // declare three variations on the same tutorial @@ -50,178 +49,178 @@ void rs_numberCountingCombination_observedWithTau(); // main driver to choose one void rs_numberCountingCombination(int flag=1) { - if(flag==1) - rs_numberCountingCombination_expected(); - if(flag==2) - rs_numberCountingCombination_observed(); - if(flag==3) - rs_numberCountingCombination_observedWithTau(); + if(flag==1) + rs_numberCountingCombination_expected(); + if(flag==2) + rs_numberCountingCombination_observed(); + if(flag==3) + rs_numberCountingCombination_observedWithTau(); } ///////////////////////////////////////////// void rs_numberCountingCombination_expected() { - ///////////////////////////////////////// - // An example of a number counting combination with two channels. - // We consider both hypothesis testing and the equivalent confidence interval. - ///////////////////////////////////////// - - - ///////////////////////////////////////// - // The Model building stage - ///////////////////////////////////////// - - // Step 1, define arrays with signal & bkg expectations and background uncertainties - Double_t s[2] = {20.,10.}; // expected signal - Double_t b[2] = {100.,100.}; // expected background - Double_t db[2] = {.0100,.0100}; // fractional background uncertainty - - - // Step 2, use a RooStats factory to build a PDF for a - // number counting combination and add it to the workspace. - // We need to give the signal expectation to relate the masterSignal - // to the signal contribution in the individual channels. - // The model neglects correlations in background uncertainty, - // but they could be added without much change to the example. - NumberCountingPdfFactory f; - RooWorkspace* wspace = new RooWorkspace(); - f.AddModel(s,2,wspace,"TopLevelPdf", "masterSignal"); - - // Step 3, use a RooStats factory to add datasets to the workspace. - // Step 3a. - // Add the expected data to the workspace - f.AddExpData(s, b, db, 2, wspace, "ExpectedNumberCountingData"); - - // see below for a printout of the workspace - // wspace->Print(); //uncomment to see structure of workspace - - ///////////////////////////////////////// - // The Hypothesis testing stage: - ///////////////////////////////////////// - // Step 4, Define the null hypothesis for the calculator - // Here you need to know the name of the variables corresponding to hypothesis. - RooRealVar* mu = wspace->var("masterSignal"); - RooArgSet* poi = new RooArgSet(*mu); - RooArgSet* nullParams = new RooArgSet("nullParams"); - nullParams->addClone(*mu); - // here we explicitly set the value of the parameters for the null - nullParams->setRealValue("masterSignal",0); - - // Step 5, Create a calculator for doing the hypothesis test. - // because this is a - ProfileLikelihoodCalculator plc( *wspace->data("ExpectedNumberCountingData"), - *wspace->pdf("TopLevelPdf"), *poi, 0.05, nullParams); - - - // Step 6, Use the Calculator to get a HypoTestResult - HypoTestResult* htr = plc.GetHypoTest(); - assert(htr != 0); - cout << "-------------------------------------------------" << endl; - cout << "The p-value for the null is " << htr->NullPValue() << endl; - cout << "Corresponding to a signifcance of " << htr->Significance() << endl; - cout << "-------------------------------------------------\n\n" << endl; - - /* expected case should return: - ------------------------------------------------- - The p-value for the null is 0.015294 - Corresponding to a signifcance of 2.16239 - ------------------------------------------------- - */ - - ////////////////////////////////////////// - // Confidence Interval Stage - - // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval. - // We need to specify what are our parameters of interest - RooArgSet* paramsOfInterest = nullParams; // they are the same as before in this case - plc.SetParameters(*paramsOfInterest); - LikelihoodInterval* lrint = (LikelihoodInterval*) plc.GetInterval(); // that was easy. - lrint->SetConfidenceLevel(0.95); - - // Step 9, make a plot of the likelihood ratio and the interval obtained - //paramsOfInterest->setRealValue("masterSignal",1.); - // find limits - double lower = lrint->LowerLimit(*mu); - double upper = lrint->UpperLimit(*mu); - - LikelihoodIntervalPlot lrPlot(lrint); - lrPlot.SetMaximum(3.); - lrPlot.Draw(); - - // Step 10a. Get upper and lower limits - cout << "lower limit on master signal = " << lower << endl; - cout << "upper limit on master signal = " << upper << endl; - - - // Step 10b, Ask if masterSignal=0 is in the interval. - // Note, this is equivalent to the question of a 2-sigma hypothesis test: - // "is the parameter point masterSignal=0 inside the 95% confidence interval?" - // Since the signficance of the Hypothesis test was > 2-sigma it should not be: - // eg. we exclude masterSignal=0 at 95% confidence. - paramsOfInterest->setRealValue("masterSignal",0.); - cout << "-------------------------------------------------" << endl; - std::cout << "Consider this parameter point:" << std::endl; - paramsOfInterest->first()->Print(); - if( lrint->IsInInterval(*paramsOfInterest) ) - std::cout << "It IS in the interval." << std::endl; - else - std::cout << "It is NOT in the interval." << std::endl; - cout << "-------------------------------------------------\n\n" << endl; - - // Step 10c, We also ask about the parameter point masterSignal=2, which is inside the interval. - paramsOfInterest->setRealValue("masterSignal",2.); - cout << "-------------------------------------------------" << endl; - std::cout << "Consider this parameter point:" << std::endl; - paramsOfInterest->first()->Print(); - if( lrint->IsInInterval(*paramsOfInterest) ) - std::cout << "It IS in the interval." << std::endl; - else - std::cout << "It is NOT in the interval." << std::endl; - cout << "-------------------------------------------------\n\n" << endl; - - - delete lrint; - delete htr; - delete wspace; - delete poi; - delete nullParams; - - - - /* - // Here's an example of what is in the workspace - // wspace->Print(); - RooWorkspace(NumberCountingWS) Number Counting WS contents - - variables - --------- - (x_0,masterSignal,expected_s_0,b_0,y_0,tau_0,x_1,expected_s_1,b_1,y_1,tau_1) - - p.d.f.s - ------- - RooProdPdf::joint[ pdfs=(sigRegion_0,sideband_0,sigRegion_1,sideband_1) ] = 2.20148e-08 - RooPoisson::sigRegion_0[ x=x_0 mean=splusb_0 ] = 0.036393 - RooPoisson::sideband_0[ x=y_0 mean=bTau_0 ] = 0.00398939 - RooPoisson::sigRegion_1[ x=x_1 mean=splusb_1 ] = 0.0380088 - RooPoisson::sideband_1[ x=y_1 mean=bTau_1 ] = 0.00398939 - - functions - -------- - RooAddition::splusb_0[ set1=(s_0,b_0) set2=() ] = 120 - RooProduct::s_0[ compRSet=(masterSignal,expected_s_0) compCSet=() ] = 20 - RooProduct::bTau_0[ compRSet=(b_0,tau_0) compCSet=() ] = 10000 - RooAddition::splusb_1[ set1=(s_1,b_1) set2=() ] = 110 - RooProduct::s_1[ compRSet=(masterSignal,expected_s_1) compCSet=() ] = 10 - RooProduct::bTau_1[ compRSet=(b_1,tau_1) compCSet=() ] = 10000 - - datasets - -------- - RooDataSet::ExpectedNumberCountingData(x_0,y_0,x_1,y_1) - - embedded precalculated expensive components - ------------------------------------------- - */ + ///////////////////////////////////////// + // An example of a number counting combination with two channels. + // We consider both hypothesis testing and the equivalent confidence interval. + ///////////////////////////////////////// + + + ///////////////////////////////////////// + // The Model building stage + ///////////////////////////////////////// + + // Step 1, define arrays with signal & bkg expectations and background uncertainties + Double_t s[2] = {20.,10.}; // expected signal + Double_t b[2] = {100.,100.}; // expected background + Double_t db[2] = {.0100,.0100}; // fractional background uncertainty + + + // Step 2, use a RooStats factory to build a PDF for a + // number counting combination and add it to the workspace. + // We need to give the signal expectation to relate the masterSignal + // to the signal contribution in the individual channels. + // The model neglects correlations in background uncertainty, + // but they could be added without much change to the example. + NumberCountingPdfFactory f; + RooWorkspace* wspace = new RooWorkspace(); + f.AddModel(s,2,wspace,"TopLevelPdf", "masterSignal"); + + // Step 3, use a RooStats factory to add datasets to the workspace. + // Step 3a. + // Add the expected data to the workspace + f.AddExpData(s, b, db, 2, wspace, "ExpectedNumberCountingData"); + + // see below for a printout of the workspace + // wspace->Print(); //uncomment to see structure of workspace + + ///////////////////////////////////////// + // The Hypothesis testing stage: + ///////////////////////////////////////// + // Step 4, Define the null hypothesis for the calculator + // Here you need to know the name of the variables corresponding to hypothesis. + RooRealVar* mu = wspace->var("masterSignal"); + RooArgSet* poi = new RooArgSet(*mu); + RooArgSet* nullParams = new RooArgSet("nullParams"); + nullParams->addClone(*mu); + // here we explicitly set the value of the parameters for the null + nullParams->setRealValue("masterSignal",0); + + // Step 5, Create a calculator for doing the hypothesis test. + // because this is a + ProfileLikelihoodCalculator plc( *wspace->data("ExpectedNumberCountingData"), + *wspace->pdf("TopLevelPdf"), *poi, 0.05, nullParams); + + + // Step 6, Use the Calculator to get a HypoTestResult + HypoTestResult* htr = plc.GetHypoTest(); + assert(htr != 0); + cout << "-------------------------------------------------" << endl; + cout << "The p-value for the null is " << htr->NullPValue() << endl; + cout << "Corresponding to a signifcance of " << htr->Significance() << endl; + cout << "-------------------------------------------------\n\n" << endl; + + /* expected case should return: + ------------------------------------------------- + The p-value for the null is 0.015294 + Corresponding to a signifcance of 2.16239 + ------------------------------------------------- + */ + + ////////////////////////////////////////// + // Confidence Interval Stage + + // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval. + // We need to specify what are our parameters of interest + RooArgSet* paramsOfInterest = nullParams; // they are the same as before in this case + plc.SetParameters(*paramsOfInterest); + LikelihoodInterval* lrint = (LikelihoodInterval*) plc.GetInterval(); // that was easy. + lrint->SetConfidenceLevel(0.95); + + // Step 9, make a plot of the likelihood ratio and the interval obtained + //paramsOfInterest->setRealValue("masterSignal",1.); + // find limits + double lower = lrint->LowerLimit(*mu); + double upper = lrint->UpperLimit(*mu); + + LikelihoodIntervalPlot lrPlot(lrint); + lrPlot.SetMaximum(3.); + lrPlot.Draw(); + + // Step 10a. Get upper and lower limits + cout << "lower limit on master signal = " << lower << endl; + cout << "upper limit on master signal = " << upper << endl; + + + // Step 10b, Ask if masterSignal=0 is in the interval. + // Note, this is equivalent to the question of a 2-sigma hypothesis test: + // "is the parameter point masterSignal=0 inside the 95% confidence interval?" + // Since the signficance of the Hypothesis test was > 2-sigma it should not be: + // eg. we exclude masterSignal=0 at 95% confidence. + paramsOfInterest->setRealValue("masterSignal",0.); + cout << "-------------------------------------------------" << endl; + std::cout << "Consider this parameter point:" << std::endl; + paramsOfInterest->first()->Print(); + if( lrint->IsInInterval(*paramsOfInterest) ) + std::cout << "It IS in the interval." << std::endl; + else + std::cout << "It is NOT in the interval." << std::endl; + cout << "-------------------------------------------------\n\n" << endl; + + // Step 10c, We also ask about the parameter point masterSignal=2, which is inside the interval. + paramsOfInterest->setRealValue("masterSignal",2.); + cout << "-------------------------------------------------" << endl; + std::cout << "Consider this parameter point:" << std::endl; + paramsOfInterest->first()->Print(); + if( lrint->IsInInterval(*paramsOfInterest) ) + std::cout << "It IS in the interval." << std::endl; + else + std::cout << "It is NOT in the interval." << std::endl; + cout << "-------------------------------------------------\n\n" << endl; + + + delete lrint; + delete htr; + delete wspace; + delete poi; + delete nullParams; + + + + /* + // Here's an example of what is in the workspace + // wspace->Print(); + RooWorkspace(NumberCountingWS) Number Counting WS contents + + variables + --------- + (x_0,masterSignal,expected_s_0,b_0,y_0,tau_0,x_1,expected_s_1,b_1,y_1,tau_1) + + p.d.f.s + ------- + RooProdPdf::joint[ pdfs=(sigRegion_0,sideband_0,sigRegion_1,sideband_1) ] = 2.20148e-08 + RooPoisson::sigRegion_0[ x=x_0 mean=splusb_0 ] = 0.036393 + RooPoisson::sideband_0[ x=y_0 mean=bTau_0 ] = 0.00398939 + RooPoisson::sigRegion_1[ x=x_1 mean=splusb_1 ] = 0.0380088 + RooPoisson::sideband_1[ x=y_1 mean=bTau_1 ] = 0.00398939 + + functions + -------- + RooAddition::splusb_0[ set1=(s_0,b_0) set2=() ] = 120 + RooProduct::s_0[ compRSet=(masterSignal,expected_s_0) compCSet=() ] = 20 + RooProduct::bTau_0[ compRSet=(b_0,tau_0) compCSet=() ] = 10000 + RooAddition::splusb_1[ set1=(s_1,b_1) set2=() ] = 110 + RooProduct::s_1[ compRSet=(masterSignal,expected_s_1) compCSet=() ] = 10 + RooProduct::bTau_1[ compRSet=(b_1,tau_1) compCSet=() ] = 10000 + + datasets + -------- + RooDataSet::ExpectedNumberCountingData(x_0,y_0,x_1,y_1) + + embedded precalculated expensive components + ------------------------------------------- + */ } @@ -230,95 +229,95 @@ void rs_numberCountingCombination_expected() void rs_numberCountingCombination_observed() { - ///////////////////////////////////////// - // The same example with observed data in a main - // measurement and an background-only auxiliary - // measurement with a factor tau more background - // than in the main measurement. - - ///////////////////////////////////////// - // The Model building stage - ///////////////////////////////////////// - - // Step 1, define arrays with signal & bkg expectations and background uncertainties - // We still need the expectation to relate signal in different channels with the master signal - Double_t s[2] = {20.,10.}; // expected signal - - - // Step 2, use a RooStats factory to build a PDF for a - // number counting combination and add it to the workspace. - // We need to give the signal expectation to relate the masterSignal - // to the signal contribution in the individual channels. - // The model neglects correlations in background uncertainty, - // but they could be added without much change to the example. - NumberCountingPdfFactory f; - RooWorkspace* wspace = new RooWorkspace(); - f.AddModel(s,2,wspace,"TopLevelPdf", "masterSignal"); - - // Step 3, use a RooStats factory to add datasets to the workspace. - // Add the observed data to the workspace - Double_t mainMeas[2] = {123.,117.}; // observed main measurement - Double_t bkgMeas[2] = {111.23,98.76}; // observed background - Double_t dbMeas[2] = {.011,.0095}; // observed fractional background uncertainty - f.AddData(mainMeas, bkgMeas, dbMeas, 2, wspace,"ObservedNumberCountingData"); - - // see below for a printout of the workspace - // wspace->Print(); //uncomment to see structure of workspace - - ///////////////////////////////////////// - // The Hypothesis testing stage: - ///////////////////////////////////////// - // Step 4, Define the null hypothesis for the calculator - // Here you need to know the name of the variables corresponding to hypothesis. - RooRealVar* mu = wspace->var("masterSignal"); - RooArgSet* poi = new RooArgSet(*mu); - RooArgSet* nullParams = new RooArgSet("nullParams"); - nullParams->addClone(*mu); - // here we explicitly set the value of the parameters for the null - nullParams->setRealValue("masterSignal",0); - - // Step 5, Create a calculator for doing the hypothesis test. - // because this is a - ProfileLikelihoodCalculator plc( *wspace->data("ObservedNumberCountingData"), - *wspace->pdf("TopLevelPdf"), *poi, 0.05, nullParams); - - wspace->var("tau_0")->Print(); - wspace->var("tau_1")->Print(); - - // Step 7, Use the Calculator to get a HypoTestResult - HypoTestResult* htr = plc.GetHypoTest(); - cout << "-------------------------------------------------" << endl; - cout << "The p-value for the null is " << htr->NullPValue() << endl; - cout << "Corresponding to a signifcance of " << htr->Significance() << endl; - cout << "-------------------------------------------------\n\n" << endl; - - /* observed case should return: - ------------------------------------------------- - The p-value for the null is 0.0351669 - Corresponding to a signifcance of 1.80975 - ------------------------------------------------- - */ - - - ////////////////////////////////////////// - // Confidence Interval Stage - - // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval. - // We need to specify what are our parameters of interest - RooArgSet* paramsOfInterest = nullParams; // they are the same as before in this case - plc.SetParameters(*paramsOfInterest); - LikelihoodInterval* lrint = (LikelihoodInterval*) plc.GetInterval(); // that was easy. - lrint->SetConfidenceLevel(0.95); - - // Step 9c. Get upper and lower limits - cout << "lower limit on master signal = " << lrint->LowerLimit(*mu ) << endl; - cout << "upper limit on master signal = " << lrint->UpperLimit(*mu ) << endl; - - delete lrint; - delete htr; - delete wspace; - delete nullParams; - delete poi; + ///////////////////////////////////////// + // The same example with observed data in a main + // measurement and an background-only auxiliary + // measurement with a factor tau more background + // than in the main measurement. + + ///////////////////////////////////////// + // The Model building stage + ///////////////////////////////////////// + + // Step 1, define arrays with signal & bkg expectations and background uncertainties + // We still need the expectation to relate signal in different channels with the master signal + Double_t s[2] = {20.,10.}; // expected signal + + + // Step 2, use a RooStats factory to build a PDF for a + // number counting combination and add it to the workspace. + // We need to give the signal expectation to relate the masterSignal + // to the signal contribution in the individual channels. + // The model neglects correlations in background uncertainty, + // but they could be added without much change to the example. + NumberCountingPdfFactory f; + RooWorkspace* wspace = new RooWorkspace(); + f.AddModel(s,2,wspace,"TopLevelPdf", "masterSignal"); + + // Step 3, use a RooStats factory to add datasets to the workspace. + // Add the observed data to the workspace + Double_t mainMeas[2] = {123.,117.}; // observed main measurement + Double_t bkgMeas[2] = {111.23,98.76}; // observed background + Double_t dbMeas[2] = {.011,.0095}; // observed fractional background uncertainty + f.AddData(mainMeas, bkgMeas, dbMeas, 2, wspace,"ObservedNumberCountingData"); + + // see below for a printout of the workspace + // wspace->Print(); //uncomment to see structure of workspace + + ///////////////////////////////////////// + // The Hypothesis testing stage: + ///////////////////////////////////////// + // Step 4, Define the null hypothesis for the calculator + // Here you need to know the name of the variables corresponding to hypothesis. + RooRealVar* mu = wspace->var("masterSignal"); + RooArgSet* poi = new RooArgSet(*mu); + RooArgSet* nullParams = new RooArgSet("nullParams"); + nullParams->addClone(*mu); + // here we explicitly set the value of the parameters for the null + nullParams->setRealValue("masterSignal",0); + + // Step 5, Create a calculator for doing the hypothesis test. + // because this is a + ProfileLikelihoodCalculator plc( *wspace->data("ObservedNumberCountingData"), + *wspace->pdf("TopLevelPdf"), *poi, 0.05, nullParams); + + wspace->var("tau_0")->Print(); + wspace->var("tau_1")->Print(); + + // Step 7, Use the Calculator to get a HypoTestResult + HypoTestResult* htr = plc.GetHypoTest(); + cout << "-------------------------------------------------" << endl; + cout << "The p-value for the null is " << htr->NullPValue() << endl; + cout << "Corresponding to a signifcance of " << htr->Significance() << endl; + cout << "-------------------------------------------------\n\n" << endl; + + /* observed case should return: + ------------------------------------------------- + The p-value for the null is 0.0351669 + Corresponding to a signifcance of 1.80975 + ------------------------------------------------- + */ + + + ////////////////////////////////////////// + // Confidence Interval Stage + + // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval. + // We need to specify what are our parameters of interest + RooArgSet* paramsOfInterest = nullParams; // they are the same as before in this case + plc.SetParameters(*paramsOfInterest); + LikelihoodInterval* lrint = (LikelihoodInterval*) plc.GetInterval(); // that was easy. + lrint->SetConfidenceLevel(0.95); + + // Step 9c. Get upper and lower limits + cout << "lower limit on master signal = " << lrint->LowerLimit(*mu ) << endl; + cout << "upper limit on master signal = " << lrint->UpperLimit(*mu ) << endl; + + delete lrint; + delete htr; + delete wspace; + delete nullParams; + delete poi; } @@ -327,94 +326,94 @@ void rs_numberCountingCombination_observed() void rs_numberCountingCombination_observedWithTau() { - ///////////////////////////////////////// - // The same example with observed data in a main - // measurement and an background-only auxiliary - // measurement with a factor tau more background - // than in the main measurement. - - ///////////////////////////////////////// - // The Model building stage - ///////////////////////////////////////// - - // Step 1, define arrays with signal & bkg expectations and background uncertainties - // We still need the expectation to relate signal in different channels with the master signal - Double_t s[2] = {20.,10.}; // expected signal - - // Step 2, use a RooStats factory to build a PDF for a - // number counting combination and add it to the workspace. - // We need to give the signal expectation to relate the masterSignal - // to the signal contribution in the individual channels. - // The model neglects correlations in background uncertainty, - // but they could be added without much change to the example. - NumberCountingPdfFactory f; - RooWorkspace* wspace = new RooWorkspace(); - f.AddModel(s,2,wspace,"TopLevelPdf", "masterSignal"); - - // Step 3, use a RooStats factory to add datasets to the workspace. - // Add the observed data to the workspace in the on-off problem. - Double_t mainMeas[2] = {123.,117.}; // observed main measurement - Double_t sideband[2] = {11123.,9876.}; // observed sideband - Double_t tau[2] = {100.,100.}; // ratio of bkg in sideband to bkg in main measurement, from experimental design. - f.AddDataWithSideband(mainMeas, sideband, tau, 2, wspace,"ObservedNumberCountingDataWithSideband"); - - // see below for a printout of the workspace - // wspace->Print(); //uncomment to see structure of workspace - - ///////////////////////////////////////// - // The Hypothesis testing stage: - ///////////////////////////////////////// - // Step 4, Define the null hypothesis for the calculator - // Here you need to know the name of the variables corresponding to hypothesis. - RooRealVar* mu = wspace->var("masterSignal"); - RooArgSet* poi = new RooArgSet(*mu); - RooArgSet* nullParams = new RooArgSet("nullParams"); - nullParams->addClone(*mu); - // here we explicitly set the value of the parameters for the null - nullParams->setRealValue("masterSignal",0); - - // Step 5, Create a calculator for doing the hypothesis test. - // because this is a - ProfileLikelihoodCalculator plc( *wspace->data("ObservedNumberCountingDataWithSideband"), - *wspace->pdf("TopLevelPdf"), *poi, 0.05, nullParams); - - - // Step 7, Use the Calculator to get a HypoTestResult - HypoTestResult* htr = plc.GetHypoTest(); - cout << "-------------------------------------------------" << endl; - cout << "The p-value for the null is " << htr->NullPValue() << endl; - cout << "Corresponding to a signifcance of " << htr->Significance() << endl; - cout << "-------------------------------------------------\n\n" << endl; - - /* observed case should return: - ------------------------------------------------- - The p-value for the null is 0.0352035 - Corresponding to a signifcance of 1.80928 - ------------------------------------------------- - */ - - - ////////////////////////////////////////// - // Confidence Interval Stage - - // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval. - // We need to specify what are our parameters of interest - RooArgSet* paramsOfInterest = nullParams; // they are the same as before in this case - plc.SetParameters(*paramsOfInterest); - LikelihoodInterval* lrint = (LikelihoodInterval*) plc.GetInterval(); // that was easy. - lrint->SetConfidenceLevel(0.95); - - - - // Step 9c. Get upper and lower limits - cout << "lower limit on master signal = " << lrint->LowerLimit(*mu ) << endl; - cout << "upper limit on master signal = " << lrint->UpperLimit(*mu ) << endl; - - delete lrint; - delete htr; - delete wspace; - delete nullParams; - delete poi; + ///////////////////////////////////////// + // The same example with observed data in a main + // measurement and an background-only auxiliary + // measurement with a factor tau more background + // than in the main measurement. + + ///////////////////////////////////////// + // The Model building stage + ///////////////////////////////////////// + + // Step 1, define arrays with signal & bkg expectations and background uncertainties + // We still need the expectation to relate signal in different channels with the master signal + Double_t s[2] = {20.,10.}; // expected signal + + // Step 2, use a RooStats factory to build a PDF for a + // number counting combination and add it to the workspace. + // We need to give the signal expectation to relate the masterSignal + // to the signal contribution in the individual channels. + // The model neglects correlations in background uncertainty, + // but they could be added without much change to the example. + NumberCountingPdfFactory f; + RooWorkspace* wspace = new RooWorkspace(); + f.AddModel(s,2,wspace,"TopLevelPdf", "masterSignal"); + + // Step 3, use a RooStats factory to add datasets to the workspace. + // Add the observed data to the workspace in the on-off problem. + Double_t mainMeas[2] = {123.,117.}; // observed main measurement + Double_t sideband[2] = {11123.,9876.}; // observed sideband + Double_t tau[2] = {100.,100.}; // ratio of bkg in sideband to bkg in main measurement, from experimental design. + f.AddDataWithSideband(mainMeas, sideband, tau, 2, wspace,"ObservedNumberCountingDataWithSideband"); + + // see below for a printout of the workspace + // wspace->Print(); //uncomment to see structure of workspace + + ///////////////////////////////////////// + // The Hypothesis testing stage: + ///////////////////////////////////////// + // Step 4, Define the null hypothesis for the calculator + // Here you need to know the name of the variables corresponding to hypothesis. + RooRealVar* mu = wspace->var("masterSignal"); + RooArgSet* poi = new RooArgSet(*mu); + RooArgSet* nullParams = new RooArgSet("nullParams"); + nullParams->addClone(*mu); + // here we explicitly set the value of the parameters for the null + nullParams->setRealValue("masterSignal",0); + + // Step 5, Create a calculator for doing the hypothesis test. + // because this is a + ProfileLikelihoodCalculator plc( *wspace->data("ObservedNumberCountingDataWithSideband"), + *wspace->pdf("TopLevelPdf"), *poi, 0.05, nullParams); + + + // Step 7, Use the Calculator to get a HypoTestResult + HypoTestResult* htr = plc.GetHypoTest(); + cout << "-------------------------------------------------" << endl; + cout << "The p-value for the null is " << htr->NullPValue() << endl; + cout << "Corresponding to a signifcance of " << htr->Significance() << endl; + cout << "-------------------------------------------------\n\n" << endl; + + /* observed case should return: + ------------------------------------------------- + The p-value for the null is 0.0352035 + Corresponding to a signifcance of 1.80928 + ------------------------------------------------- + */ + + + ////////////////////////////////////////// + // Confidence Interval Stage + + // Step 8, Here we re-use the ProfileLikelihoodCalculator to return a confidence interval. + // We need to specify what are our parameters of interest + RooArgSet* paramsOfInterest = nullParams; // they are the same as before in this case + plc.SetParameters(*paramsOfInterest); + LikelihoodInterval* lrint = (LikelihoodInterval*) plc.GetInterval(); // that was easy. + lrint->SetConfidenceLevel(0.95); + + + + // Step 9c. Get upper and lower limits + cout << "lower limit on master signal = " << lrint->LowerLimit(*mu ) << endl; + cout << "upper limit on master signal = " << lrint->UpperLimit(*mu ) << endl; + + delete lrint; + delete htr; + delete wspace; + delete nullParams; + delete poi; } diff --git a/tutorials/roostats/rs_numbercountingutils.C b/tutorials/roostats/rs_numbercountingutils.C index 5131df1286142..32adeddeeb473 100644 --- a/tutorials/roostats/rs_numbercountingutils.C +++ b/tutorials/roostats/rs_numbercountingutils.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_roostats +/// \notebook -nodraw /// 'Number Counting Utils' RooStats tutorial /// /// This tutorial shows an example of the RooStats standalone @@ -35,94 +36,87 @@ /// /// \author Kyle Cranmer -#ifndef __CINT__ -// you need to include this for compiled macro. -// But for CINT, it needs to be in this ifndef/endif condition -#include "RooStats/NumberCountingUtils.h" -#include "RooGlobalFunc.h" -#endif #include "RooStats/RooStatsUtils.h" - #include using namespace RooFit; -using namespace RooStats ; // the utilities are in the RooStats namespace +using namespace RooStats; // the utilities are in the RooStats namespace using namespace std ; void rs_numbercountingutils() { - // From the root prompt, you can see the full list of functions by using tab-completion - - // root [0] RooStats::NumberCountingUtils:: - // BinomialExpZ - // BinomialWithTauExpZ - // BinomialObsZ - // BinomialWithTauObsZ - // BinomialExpP - // BinomialWithTauExpP - // BinomialObsP - // BinomialWithTauObsP - - // For each of the utilities you can inspect the arguments by tab completion - - //root [1] NumberCountingUtils::BinomialExpZ( - //Double_t BinomialExpZ(Double_t sExp, Double_t bExp, Double_t fractionalBUncertainty) - - ///////////////////////////////////////////////////// - // Here we see common usages where the experimenter - // has a relative background uncertainty, without - // explicit reference to the auxiliary or sideband - // measurement - - ///////////////////////////////////////////////////// - // Expected p-values and significance with background uncertainty - //////////////////////////////////////////////////// - double sExpected = 50; - double bExpected = 100; - double relativeBkgUncert = 0.1; - - double pExp = NumberCountingUtils::BinomialExpP(sExpected, bExpected, relativeBkgUncert); - double zExp = NumberCountingUtils::BinomialExpZ(sExpected, bExpected, relativeBkgUncert); - cout << "expected p-value ="<< pExp << " Z value (Gaussian sigma) = "<< zExp << endl; - - ///////////////////////////////////////////////////// - // Expected p-values and significance with background uncertainty - //////////////////////////////////////////////////// - double observed = 150; - double pObs = NumberCountingUtils::BinomialObsP(observed, bExpected, relativeBkgUncert); - double zObs = NumberCountingUtils::BinomialObsZ(observed, bExpected, relativeBkgUncert); - cout << "observed p-value ="<< pObs << " Z value (Gaussian sigma) = "<< zObs << endl; - - - ///////////////////////////////////////////////////// - // Here we see usages where the experimenter has knowledge - // about the properties of the auxiliary or sideband - // measurement. In particular, the ratio tau of background - // in the auxiliary measurement to the main measurement. - // Large values of tau mean small background uncertainty - // because the sideband is very constraining. - - // Usage: - // root [0] RooStats::NumberCountingUtils::BinomialWithTauExpP( - // Double_t BinomialWithTauExpP(Double_t sExp, Double_t bExp, Double_t tau) - - - ///////////////////////////////////////////////////// - // Expected p-values and significance with background uncertainty - //////////////////////////////////////////////////// - double tau = 1; - - double pExpWithTau = NumberCountingUtils::BinomialWithTauExpP(sExpected, bExpected, tau); - double zExpWithTau = NumberCountingUtils::BinomialWithTauExpZ(sExpected, bExpected, tau); - cout << "expected p-value ="<< pExpWithTau << " Z value (Gaussian sigma) = "<< zExpWithTau << endl; - - ///////////////////////////////////////////////////// - // Expected p-values and significance with background uncertainty - //////////////////////////////////////////////////// - double pObsWithTau = NumberCountingUtils::BinomialWithTauObsP(observed, bExpected, tau); - double zObsWithTau = NumberCountingUtils::BinomialWithTauObsZ(observed, bExpected, tau); - cout << "observed p-value ="<< pObsWithTau << " Z value (Gaussian sigma) = "<< zObsWithTau << endl; + // From the root prompt, you can see the full list of functions by using tab-completion + + // root [0] RooStats::NumberCountingUtils:: + // BinomialExpZ + // BinomialWithTauExpZ + // BinomialObsZ + // BinomialWithTauObsZ + // BinomialExpP + // BinomialWithTauExpP + // BinomialObsP + // BinomialWithTauObsP + + // For each of the utilities you can inspect the arguments by tab completion + + //root [1] NumberCountingUtils::BinomialExpZ( + //Double_t BinomialExpZ(Double_t sExp, Double_t bExp, Double_t fractionalBUncertainty) + + ///////////////////////////////////////////////////// + // Here we see common usages where the experimenter + // has a relative background uncertainty, without + // explicit reference to the auxiliary or sideband + // measurement + + ///////////////////////////////////////////////////// + // Expected p-values and significance with background uncertainty + //////////////////////////////////////////////////// + double sExpected = 50; + double bExpected = 100; + double relativeBkgUncert = 0.1; + + double pExp = NumberCountingUtils::BinomialExpP(sExpected, bExpected, relativeBkgUncert); + double zExp = NumberCountingUtils::BinomialExpZ(sExpected, bExpected, relativeBkgUncert); + cout << "expected p-value ="<< pExp << " Z value (Gaussian sigma) = "<< zExp << endl; + + ///////////////////////////////////////////////////// + // Expected p-values and significance with background uncertainty + //////////////////////////////////////////////////// + double observed = 150; + double pObs = NumberCountingUtils::BinomialObsP(observed, bExpected, relativeBkgUncert); + double zObs = NumberCountingUtils::BinomialObsZ(observed, bExpected, relativeBkgUncert); + cout << "observed p-value ="<< pObs << " Z value (Gaussian sigma) = "<< zObs << endl; + + + ///////////////////////////////////////////////////// + // Here we see usages where the experimenter has knowledge + // about the properties of the auxiliary or sideband + // measurement. In particular, the ratio tau of background + // in the auxiliary measurement to the main measurement. + // Large values of tau mean small background uncertainty + // because the sideband is very constraining. + + // Usage: + // root [0] RooStats::NumberCountingUtils::BinomialWithTauExpP( + // Double_t BinomialWithTauExpP(Double_t sExp, Double_t bExp, Double_t tau) + + + ///////////////////////////////////////////////////// + // Expected p-values and significance with background uncertainty + //////////////////////////////////////////////////// + double tau = 1; + + double pExpWithTau = NumberCountingUtils::BinomialWithTauExpP(sExpected, bExpected, tau); + double zExpWithTau = NumberCountingUtils::BinomialWithTauExpZ(sExpected, bExpected, tau); + cout << "expected p-value ="<< pExpWithTau << " Z value (Gaussian sigma) = "<< zExpWithTau << endl; + + ///////////////////////////////////////////////////// + // Expected p-values and significance with background uncertainty + //////////////////////////////////////////////////// + double pObsWithTau = NumberCountingUtils::BinomialWithTauObsP(observed, bExpected, tau); + double zObsWithTau = NumberCountingUtils::BinomialWithTauObsZ(observed, bExpected, tau); + cout << "observed p-value ="<< pObsWithTau << " Z value (Gaussian sigma) = "<< zObsWithTau << endl; } diff --git a/tutorials/spectrum/peaks.C b/tutorials/spectrum/peaks.C index f74d473b47d44..abe2ea3657d97 100644 --- a/tutorials/spectrum/peaks.C +++ b/tutorials/spectrum/peaks.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_spectrum +/// \notebook /// Getting Contours From TH2D. /// Illustrates how to find peaks in histograms. /// diff --git a/tutorials/spectrum/peaks2.C b/tutorials/spectrum/peaks2.C index d05823540620c..617b763a4b8e9 100644 --- a/tutorials/spectrum/peaks2.C +++ b/tutorials/spectrum/peaks2.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_spectrum +/// \notebook /// Example to illustrate the 2-d peak finder (class TSpectrum2). /// /// This script generates a random number of 2-d gaussian peaks diff --git a/tutorials/spectrum/spectrumpainter.C b/tutorials/spectrum/spectrumpainter.C index 008429e090509..aecf21586b4ff 100644 --- a/tutorials/spectrum/spectrumpainter.C +++ b/tutorials/spectrum/spectrumpainter.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_spectrum +/// \notebook /// Examples showing how to use TSpectrum2Painter (the SPEC option) /// /// \macro_image @@ -7,6 +8,7 @@ /// /// \author: Olivier Couet, Miroslav Morhac +void spectrumpainter() { TH2 *h2 = new TH2F("h2","h2",40,-8,8,40,-9,9); Float_t px, py; diff --git a/tutorials/xml/DOMParsePerson.C b/tutorials/xml/DOMParsePerson.C index 8beb1589a0cf7..25cf8d23666e1 100644 --- a/tutorials/xml/DOMParsePerson.C +++ b/tutorials/xml/DOMParsePerson.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_xml +/// \notebook -nodraw /// ROOT implementation of a XML DOM Parser /// /// This is an example of how Dom Parser works. It will parse the xml file @@ -233,7 +234,7 @@ void DOMParsePerson() { PersonList personlist; gROOT->ProcessLine(".O 0"); - TString dir = gSystem->DirName(__FILE__); - if (personlist.ParseFile(dir+"/person.xml") == 0) + TString dir = gROOT->GetTutorialsDir(); + if (personlist.ParseFile(dir+"/xml/person.xml") == 0) cout << personlist << endl; } diff --git a/tutorials/xml/DOMRecursive.C b/tutorials/xml/DOMRecursive.C index 6f6cfbc3744e8..9d4f95e535fd7 100644 --- a/tutorials/xml/DOMRecursive.C +++ b/tutorials/xml/DOMRecursive.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_xml +/// \notebook -nodraw /// ROOT implementation of a XML DOM Parser /// /// This is an example of how Dom Parser walks the DOM tree recursively. @@ -53,9 +54,9 @@ void ParseContext(TXMLNode *node) void DOMRecursive() { TDOMParser *domParser = new TDOMParser(); - TString dir = gSystem->DirName(__FILE__); + TString dir = gROOT->GetTutorialsDir(); domParser->SetValidate(false); // do not validate with DTD - domParser->ParseFile(dir+"/person.xml"); + domParser->ParseFile(dir+"/xml/person.xml"); TXMLNode *node = domParser->GetXMLDocument()->GetRootNode(); diff --git a/tutorials/xml/SAXHandler.C b/tutorials/xml/SAXHandler.C index 5deee693a58e2..d1e12edb119d7 100644 --- a/tutorials/xml/SAXHandler.C +++ b/tutorials/xml/SAXHandler.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_xml +/// \notebook -nodraw /// ROOT implementation of a simple SAX Handler. /// /// This handler uses TSAXParser, a SAX Parser using the SAX interface @@ -103,6 +104,6 @@ void SAXHandler() SaxHandler *saxHandler = new SaxHandler(); saxParser->ConnectToHandler("SaxHandler", saxHandler); - TString dir = gSystem->DirName(__FILE__); - saxParser->ParseFile(dir+"/saxexample.xml"); + TString dir = gROOT->GetTutorialsDir(); + saxParser->ParseFile(dir+"/xml/saxexample.xml"); } diff --git a/tutorials/xml/xmlnewfile.C b/tutorials/xml/xmlnewfile.C index 2ae71a82542e3..66adca5eecd5a 100644 --- a/tutorials/xml/xmlnewfile.C +++ b/tutorials/xml/xmlnewfile.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_xml +/// \notebook -nodraw /// Example to create a new xml file with the TXMLEngine class /// /// \macro_code diff --git a/tutorials/xml/xmlreadfile.C b/tutorials/xml/xmlreadfile.C index ae19b07a8cb26..b0a5b39e5932f 100644 --- a/tutorials/xml/xmlreadfile.C +++ b/tutorials/xml/xmlreadfile.C @@ -1,5 +1,6 @@ /// \file /// \ingroup tutorial_xml +/// \notebook -nodraw /// Example to read and parse any xml file, supported by TXMLEngine class /// The input file, produced by xmlnewfile.C macro is used /// If you need full xml syntax support, use TXMLParser instead