diff --git a/doc/distributions/dist_reference.qbk b/doc/distributions/dist_reference.qbk index 63fe12bf32..c225d1953e 100644 --- a/doc/distributions/dist_reference.qbk +++ b/doc/distributions/dist_reference.qbk @@ -21,6 +21,7 @@ [include inverse_chi_squared.qbk] [include inverse_gamma.qbk] [include inverse_gaussian.qbk] +[include kolmogorov_smirnov.qbk] [include laplace.qbk] [include logistic.qbk] [include lognormal.qbk] diff --git a/doc/distributions/kolmogorov_smirnov.qbk b/doc/distributions/kolmogorov_smirnov.qbk new file mode 100644 index 0000000000..c557f6fc41 --- /dev/null +++ b/doc/distributions/kolmogorov_smirnov.qbk @@ -0,0 +1,122 @@ +[section:kolmogorov_smirnov_dist Kolmogorov-Smirnov Distribution] + +``#include `` + + namespace boost{ namespace math{ + + template + class kolmogorov_smirnov_distribution; + + typedef kolmogorov_smirnov_distribution<> kolmogorov_smirnov; + + template + class kolmogorov_smirnov_distribution + { + public: + typedef RealType value_type; + typedef Policy policy_type; + + // Constructor: + kolmogorov_smirnov_distribution(RealType n); + + // Accessor to parameter: + RealType number_of_observations()const; + + }} // namespaces + +The Kolmogorov-Smirnov test in statistics compares two empirical distributions, +or an empirical distribution against any theoretical distribution.[footnote +[@https://en.wikipedia.org/wiki/Kolmogorov–Smirnov_test Wikipedia: +Kolmogorov-Smirnov test]] It makes use of a specific distribution which is +informally known in the literature as the Kolmogorv-Smirnov distribution, +implemented here. + +Formally, if /n/ observations are taken from a theoretical distribution /G(x)/, +and if /G/[sub /n/](/x/) represents the empirical CDF of those /n/ observations, +then the test statistic + +[equation kolmogorov_smirnov_test_statistic] [/ D_n = \sup_x|G(x)-\hat{G}(x)| ] + +will be distributed according to a Kolmogorov-Smirnov distribution parameterized by /n/. + +The exact form of a Kolmogorov-Smirnov distribution is the subject of a large, +decades-old literature.[footnote +[@https://www.jstatsoft.org/article/view/v039i11 Simard, R. and L'Ecuyer, P. +(2011) "Computing the Two-Sided Kolmogorov-Smirnov Distribution". Journal of +Statistical Software, vol. 39, no. 11.]] In the interest of simplicity, Boost +implements the first-order, limiting form of this distribution (the same form +originally identified by Kolmogorov[footnote Kolmogorov A (1933). "Sulla +determinazione empirica di una legge di distribuzione". G. Ist. Ital. Attuari. +4: 83–91.]), namely + +[equation kolmogorov_smirnov_distribution] +[/ \lim_{n \to \infty} F_n(x/\sqrt{n})=1+2\sum_{k=1}^\infty (-1)^k e^{-2k^2x^2} ] + +Note that while the exact distribution only has support over \[0, 1\], this +limiting form has positive mass above unity, particularly for small /n/. The +following graph illustrations how the distribution changes for different values +of /n/: + +[graph kolmogorov_smirnov_pdf] + +[h4 Member Functions] + + kolmogorov_smirnov_distribution(RealType n); + +Constructs a Kolmogorov-Smirnov distribution with /n/ observations. + +Requires n > 0, otherwise calls __domain_error. + + RealType number_of_observations()const; + +Returns the parameter /n/ from which this object was constructed. + +[h4 Non-member Accessors] + +All the [link math_toolkit.dist_ref.nmp usual non-member accessor functions] +that are generic to all distributions are supported: __usual_accessors. + +The domain of the random variable is \[0, +[infin]\]. + +[h4 Accuracy] + +The CDF of the Kolmogorov-Smirnov distribution is implemented in terms of the +fourth [link math_toolkit.jacobi_theta.jacobi_theta_overview Jacobi Theta +function]; please refer to the accuracy ULP plots for that function. + +The PDF is implemented separately, and the following ULP plot illustrates its +accuracy: + +[graph kolmogorov_smirnov_pdf_ulp] + +Because PDF values are simply scaled out and up by the square root of /n/, the +above plot is representative for all values of /n/. Note that for present +purposes, "accuracy" refers to deviations from the limiting approximation, +rather than deviations from the exact distribution. + +[h4 Implementation] + +In the following table, /n/ is the number of observations, /x/ is the random variable, +[pi] is Archimedes' constant, and [zeta](3) is Apéry's constant. + +[table +[[Function][Implementation Notes]] +[[cdf][Using the relation: cdf = __jacobi_theta4tau(0, 2*x*x/[pi])]] +[[pdf][Using a manual derivative of the CDF]] +[[cdf complement][ +When x*x*n == 0: 1 + +When 2*x*x*n <= [pi]: 1 - __jacobi_theta4tau(0, 2*x*x*n/[pi]) + +When 2*x*x*n > [pi]: -__jacobi_theta4m1tau(0, 2*x*x*n/[pi])]] +[[quantile][Using a Newton-Raphson iteration]] +[[quantile from the complement][Using a Newton-Raphson iteration]] +[[mode][Using a run-time PDF maximizer]] +[[mean][sqrt([pi]/2) * ln(2) / sqrt(n)]] +[[variance][([pi][super 2]/12 - [pi]/2*ln[super 2](2))/n]] +[[skewness][(9/16*sqrt([pi]/2)*[zeta](3)/n[super 3/2] - 3 * mean * variance - mean[super 2] * variance) / (variance[super 3/2])]] +[[kurtosis][(7/720*[pi][super 4]/n[super 2] - 4 * mean * skewness * variance[super 3/2] - 6 * mean[super 2] * variance - mean[super 4]) / (variance[super 2])]] +] + +[endsect] [/section:kolmogorov_smirnov_dist Kolmogorov-Smirnov] diff --git a/doc/equations/kolmogorov_smirnov_distribution.svg b/doc/equations/kolmogorov_smirnov_distribution.svg new file mode 100644 index 0000000000..4233fe70d1 --- /dev/null +++ b/doc/equations/kolmogorov_smirnov_distribution.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/doc/equations/kolmogorov_smirnov_test_statistic.svg b/doc/equations/kolmogorov_smirnov_test_statistic.svg new file mode 100644 index 0000000000..8c72cefa59 --- /dev/null +++ b/doc/equations/kolmogorov_smirnov_test_statistic.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/doc/graphs/dist_graphs.cpp b/doc/graphs/dist_graphs.cpp index bcf0bb23af..611c357cae 100644 --- a/doc/graphs/dist_graphs.cpp +++ b/doc/graphs/dist_graphs.cpp @@ -84,8 +84,9 @@ class distribution_plotter m_distributions.push_back(std::make_pair(name, d)); // // Get the extent of the distribution from the support: - double a, b; - std::tr1::tie(a, b) = support(d); + std::pair r = support(d); + double a = r.first; + double b = r.second; // // PDF maximum is at the mode (probably): double mod; @@ -253,7 +254,7 @@ class distribution_plotter if(!is_discrete_distribution::value) { // Continuous distribution: - for(std::list >::const_iterator i = m_distributions.begin(); + for(typename std::list >::const_iterator i = m_distributions.begin(); i != m_distributions.end(); ++i) { double x = m_min_x; @@ -280,7 +281,7 @@ class distribution_plotter // Discrete distribution: double x_width = 0.75 / m_distributions.size(); double x_off = -0.5 * 0.75; - for(std::list >::const_iterator i = m_distributions.begin(); + for(typename std::list >::const_iterator i = m_distributions.begin(); i != m_distributions.end(); ++i) { double x = ceil(m_min_x); @@ -469,6 +470,22 @@ int main() fisher_f_plotter.add(boost::math::fisher_f_distribution<>(4, 10), "n=4, m=10"); fisher_f_plotter.plot("F Distribution PDF", "fisher_f_pdf.svg"); + distribution_plotter > + kolmogorov_smirnov_cdf_plotter(false); + kolmogorov_smirnov_cdf_plotter.add(boost::math::kolmogorov_smirnov_distribution<>(1), "n=1"); + kolmogorov_smirnov_cdf_plotter.add(boost::math::kolmogorov_smirnov_distribution<>(2), "n=2"); + kolmogorov_smirnov_cdf_plotter.add(boost::math::kolmogorov_smirnov_distribution<>(5), "n=5"); + kolmogorov_smirnov_cdf_plotter.add(boost::math::kolmogorov_smirnov_distribution<>(10), "n=10"); + kolmogorov_smirnov_cdf_plotter.plot("Kolmogorov-Smirnov Distribution CDF", "kolmogorov_smirnov_cdf.svg"); + + distribution_plotter > + kolmogorov_smirnov_pdf_plotter; + kolmogorov_smirnov_pdf_plotter.add(boost::math::kolmogorov_smirnov_distribution<>(1), "n=1"); + kolmogorov_smirnov_pdf_plotter.add(boost::math::kolmogorov_smirnov_distribution<>(2), "n=2"); + kolmogorov_smirnov_pdf_plotter.add(boost::math::kolmogorov_smirnov_distribution<>(5), "n=5"); + kolmogorov_smirnov_pdf_plotter.add(boost::math::kolmogorov_smirnov_distribution<>(10), "n=10"); + kolmogorov_smirnov_pdf_plotter.plot("Kolmogorov-Smirnov Distribution PDF", "kolmogorov_smirnov_pdf.svg"); + distribution_plotter > lognormal_plotter; lognormal_plotter.add(boost::math::lognormal_distribution<>(-1), "location=-1"); diff --git a/doc/graphs/kolmogorov_smirnov_pdf.svg b/doc/graphs/kolmogorov_smirnov_pdf.svg new file mode 100644 index 0000000000..46dcb98aa7 --- /dev/null +++ b/doc/graphs/kolmogorov_smirnov_pdf.svg @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +0.5 +1 +1.5 +0 + +0 +1 +2 +3 +4 +5 +0 + +Probability + +Random Variable + + + + + + + + + +n=1 +n=2 +n=5 +n=10 + +Kolmogorov-Smirnov Distribution PDF + + + diff --git a/doc/graphs/kolmogorov_smirnov_pdf_ulp.svg b/doc/graphs/kolmogorov_smirnov_pdf_ulp.svg new file mode 100644 index 0000000000..f1adef5dc2 --- /dev/null +++ b/doc/graphs/kolmogorov_smirnov_pdf_ulp.svg @@ -0,0 +1,2551 @@ + + + +Kolmogorov-Smirnov PDF (N=10) ULP plot at float precision + + + + +-75 + +-50 + +-25 + +0 + +25 + +50 + +75 + +100 + +0.1 + +0.2 + +0.3 + +0.4 + +0.5 + +0.6 + +0.7 + +0.8 + +0.9 + +1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/html/dist.html b/doc/html/dist.html index 6dfd1bd00c..4d84eb9867 100644 --- a/doc/html/dist.html +++ b/doc/html/dist.html @@ -169,6 +169,8 @@ Gamma Distribution
Inverse Gaussian (or Inverse Normal) Distribution
+
Kolmogorov-Smirnov + Distribution
Laplace Distribution
Logistic Distribution
diff --git a/doc/html/math_toolkit/dist_ref/dists/kolmogorov_smirnov_dist.html b/doc/html/math_toolkit/dist_ref/dists/kolmogorov_smirnov_dist.html new file mode 100644 index 0000000000..ff44b4c370 --- /dev/null +++ b/doc/html/math_toolkit/dist_ref/dists/kolmogorov_smirnov_dist.html @@ -0,0 +1,362 @@ + + + +Kolmogorov-Smirnov Distribution + + + + + + + + + + + + + + + +
Boost C++ LibrariesHomeLibrariesPeopleFAQMore
+
+
+PrevUpHomeNext +
+
+ + +
#include <boost/math/distributions/kolmogorov_smirnov.hpp>
+
namespace boost{ namespace math{
+
+template <class RealType = double,
+          class Policy   = policies::policy<> >
+class kolmogorov_smirnov_distribution;
+
+typedef kolmogorov_smirnov_distribution<> kolmogorov_smirnov;
+
+template <class RealType, class Policy>
+class kolmogorov_smirnov_distribution
+{
+public:
+   typedef RealType  value_type;
+   typedef Policy    policy_type;
+
+   // Constructor:
+   kolmogorov_smirnov_distribution(RealType n);
+
+   // Accessor to parameter:
+   RealType number_of_observations()const;
+
+}} // namespaces
+
+

+ The Kolmogorov-Smirnov test in statistics compares two empirical distributions, + or an empirical distribution against any theoretical distribution.[1] It makes use of a specific distribution which is informally + known in the literature as the Kolmogorv-Smirnov distribution, implemented + here. +

+

+ Formally, if n observations are taken from a theoretical + distribution G(x), and if Gn(x) + represents the empirical CDF of those n observations, + then the test statistic +

+
+

+ + +

+
+

+ will be distributed according to a Kolmogorov-Smirnov distribution parameterized + by n. +

+

+ The exact form of a Kolmogorov-Smirnov distribution is the subject of a + large, decades-old literature.[2] In the interest of simplicity, Boost implements the first-order, + limiting form of this distribution (the same form originally identified + by Kolmogorov[3]), namely +

+
+

+ + +

+
+

+ Note that while the exact distribution only has support over [0, 1], this + limiting form has positive mass above unity, particlularly for small n. + The following graph illustrations how the distribution changes for different + values of n: +

+
+

+ + +

+
+
+ + Member + Functions +
+
kolmogorov_smirnov_distribution(RealType n);
+
+

+ Constructs a Kolmogorov-Smirnov distribution with n + observations. +

+

+ Requires n > 0, otherwise calls domain_error. +

+
RealType number_of_observations()const;
+
+

+ Returns the parameter n from which this object was + constructed. +

+
+ + Non-member + Accessors +
+

+ All the usual non-member accessor + functions that are generic to all distributions are supported: + Cumulative Distribution Function, + Probability Density Function, + Quantile, Hazard Function, Cumulative Hazard Function, + mean, median, + mode, variance, + standard deviation, + skewness, kurtosis, kurtosis_excess, + range and support. +

+

+ The domain of the random variable is [0, +∞]. +

+
+ + Accuracy +
+

+ The CDF of the Kolmogorov-Smirnov distribution is implemented in terms + of the fourth Jacobi + Theta function; please refer to the accuracy ULP plots for that + function. +

+

+ The PDF is implemented separately, and the following ULP plot illustrates + its accuracy: +

+
+

+ + +

+
+

+ Because PDF values are simply scaled out and up by the square root of + n, the above plot is representative for all values + of n. Note that for present purposes, "accuracy" + refers to deviations from the limiting approximation, rather than deviations + from the exact distribution. +

+
+ + Implementation +
+

+ In the following table, n is the number of observations, + x is the random variable, π is Archimedes' constant, + and ζ(3) is Apéry's constant. +

+
+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Function +

+
+

+ Implementation Notes +

+
+

+ cdf +

+
+

+ Using the relation: cdf = jacobi_theta4tau(0, + 2*x*x/π) +

+
+

+ pdf +

+
+

+ Using a manual derivative of the CDF +

+
+

+ cdf complement +

+
+

+ When x*x*n == 0: 1 +

+

+ When 2*x*x*n <= π: 1 - jacobi_theta4tau(0, + 2*x*x*n/π) +

+

+ When 2*x*x*n > π: -jacobi_theta4m1tau(0, + 2*x*x*n/π) +

+
+

+ quantile +

+
+

+ Using a Newton-Raphson iteration +

+
+

+ quantile from the complement +

+
+

+ Using a Newton-Raphson iteration +

+
+

+ mode +

+
+

+ Using a run-time PDF maximizer +

+
+

+ mean +

+
+

+ sqrt(π/2) * ln(2) / sqrt(n) +

+
+

+ variance +

+
+

+ (π2/12 - π/2*ln2(2))/n +

+
+

+ skewness +

+
+

+ (9/16*sqrt(π/2)*ζ(3)/n3/2 - 3 * mean * variance - mean2 * variance) + / (variance3/2) +

+
+

+ kurtosis +

+
+

+ (7/720*π4/n2 - 4 * mean * skewness * variance3/2 - 6 * mean2 * variance + - mean4) / (variance2) +

+
+
+
+

+ + +
+

[3] + Kolmogorov A (1933). "Sulla determinazione empirica di una legge + di distribuzione". G. Ist. Ital. Attuari. 4: 83–91. +

+
+
+
+ + + +
+
+
+PrevUpHomeNext +
+ + diff --git a/doc/html/standalone_HTML.manifest b/doc/html/standalone_HTML.manifest index 003b88ea9b..614cce4973 100644 --- a/doc/html/standalone_HTML.manifest +++ b/doc/html/standalone_HTML.manifest @@ -130,6 +130,7 @@ math_toolkit/dist_ref/dists/hypergeometric_dist.html math_toolkit/dist_ref/dists/inverse_chi_squared_dist.html math_toolkit/dist_ref/dists/inverse_gamma_dist.html math_toolkit/dist_ref/dists/inverse_gaussian_dist.html +math_toolkit/dist_ref/dists/kolmogorov_smirnov_dist.html math_toolkit/dist_ref/dists/laplace_dist.html math_toolkit/dist_ref/dists/logistic_dist.html math_toolkit/dist_ref/dists/lognormal_dist.html diff --git a/doc/math.qbk b/doc/math.qbk index 984140d840..847b33e6eb 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -387,6 +387,7 @@ and use the function's name as the link text.] [def __inverse_gamma_distrib [link math_toolkit.dist_ref.dists.inverse_gamma_dist Inverse Gamma Distribution]] [def __inverse_gaussian_distrib [link math_toolkit.dist_ref.dists.inverse_gaussian_dist Inverse Gaussian Distribution]] [def __inverse_chi_squared_distrib [link math_toolkit.dist_ref.dists.inverse_chi_squared_dist Inverse chi squared Distribution]] +[def __kolmogorov_smirnov_distrib [link math_toolkit.dist_ref.dists.kolmogorov_smirnov Kolmogorov-Smirnov Distribution]] [def __laplace_distrib [link math_toolkit.dist_ref.dists.laplace_dist Laplace Distribution]] [def __logistic_distrib [link math_toolkit.dist_ref.dists.logistic_dist Logistic Distribution]] [def __lognormal_distrib [link math_toolkit.dist_ref.dists.lognormal_dist Log-normal Distribution]] diff --git a/include/boost/math/distributions.hpp b/include/boost/math/distributions.hpp index 300f2312ce..64da99415e 100644 --- a/include/boost/math/distributions.hpp +++ b/include/boost/math/distributions.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/include/boost/math/distributions/fwd.hpp b/include/boost/math/distributions/fwd.hpp index 5b212c8ec6..a3c1a41df5 100644 --- a/include/boost/math/distributions/fwd.hpp +++ b/include/boost/math/distributions/fwd.hpp @@ -63,6 +63,9 @@ class inverse_gamma_distribution; template class inverse_gaussian_distribution; +template +class kolmogorov_smirnov_distribution; + template class laplace_distribution; @@ -129,6 +132,7 @@ class weibull_distribution; typedef boost::math::gamma_distribution gamma;\ typedef boost::math::geometric_distribution geometric;\ typedef boost::math::hypergeometric_distribution hypergeometric;\ + typedef boost::math::kolmogorov_smirnov_distribution kolmogorov_smirnov;\ typedef boost::math::inverse_chi_squared_distribution inverse_chi_squared;\ typedef boost::math::inverse_gaussian_distribution inverse_gaussian;\ typedef boost::math::inverse_gamma_distribution inverse_gamma;\ diff --git a/include/boost/math/distributions/kolmogorov_smirnov.hpp b/include/boost/math/distributions/kolmogorov_smirnov.hpp new file mode 100644 index 0000000000..1dc0a92ccf --- /dev/null +++ b/include/boost/math/distributions/kolmogorov_smirnov.hpp @@ -0,0 +1,494 @@ +// Kolmogorov-Smirnov 1st order asymptotic distribution +// Copyright Evan Miller 2020 +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// The Kolmogorov-Smirnov test in statistics compares two empirical distributions, +// or an empirical distribution against any theoretical distribution. It makes +// use of a specific distribution which doesn't have a formal name, but which +// is often called the Kolmogorv-Smirnov distribution for lack of anything +// better. This file implements the limiting form of this distribution, first +// identified by Andrey Kolmogorov in +// +// Kolmogorov, A. (1933) “Sulla Determinazione Empirica di una Legge di +// Distribuzione.” Giornale dell’ Istituto Italiano degli Attuari +// +// This limiting form of the CDF is a first-order Taylor expansion that is +// easily implemented by the fourth Jacobi Theta function (setting z=0). The +// PDF is then implemented here as a derivative of the Theta function. Note +// that this derivative is with respect to x, which enters into \tau, and not +// with respect to the z argument, which is always zero, and so the derivative +// identities in DLMF 20.4 do not apply here. +// +// A higher order order expansion is possible, and was first outlined by +// +// Pelz W, Good IJ (1976). “Approximating the Lower Tail-Areas of the +// Kolmogorov-Smirnov One-sample Statistic.” Journal of the Royal Statistical +// Society B. +// +// The terms in this expansion get fairly complicated, and as far as I know the +// Pelz-Good expansion is not used in any statistics software. Someone could +// consider updating this implementation to use the Pelz-Good expansion in the +// future, but the math gets considerably hairier with each additional term. +// +// A formula for an exact version of the Kolmogorov-Smirnov test is laid out in +// Equation 2.4.4 of +// +// Durbin J (1973). “Distribution Theory for Tests Based on the Sample +// Distribution Func- tion.” In SIAM CBMS-NSF Regional Conference Series in +// Applied Mathematics. SIAM, Philadelphia, PA. +// +// which is available in book form from Amazon and others. This exact version +// involves taking powers of large matrices. To do that right you need to +// compute eigenvalues and eigenvectors, which are beyond the scope of Boost. +// (Some recent work indicates the exact form can also be computed via FFT, see +// https://cran.r-project.org/web/packages/KSgeneral/KSgeneral.pdf). +// +// Even if the CDF of the exact distribution could be computed using Boost +// libraries (which would be cumbersome), the PDF would present another +// difficulty. Therefore I am limiting this implementation to the asymptotic +// form, even though the exact form has trivial values for certain specific +// values of x and n. For more on trivial values see +// +// Ruben H, Gambino J (1982). “The Exact Distribution of Kolmogorov’s Statistic +// Dn for n ≤ 10.” Annals of the Institute of Statistical Mathematics. +// +// For a good bibliography and overview of the various algorithms, including +// both exact and asymptotic forms, see +// https://www.jstatsoft.org/article/view/v039i11 +// +// As for this implementation: the distribution is parameterized by n (number +// of observations) in the spirit of chi-squared's degrees of freedom. It then +// takes a single argument x. In terms of the Kolmogorov-Smirnov statistical +// test, x represents the distribution of D_n, where D_n is the maximum +// difference between the CDFs being compared, that is, +// +// D_n = sup|F_n(x) - G(x)| +// +// In the exact distribution, x is confined to the support [0, 1], but in this +// limiting approximation, we allow x to exceed unity (similar to how a normal +// approximation always spills over any boundaries). +// +// As mentioned previously, the CDF is implemented using the \tau +// parameterization of the fourth Jacobi Theta function as +// +// CDF=θ₄(0|2*x*x*n/pi) +// +// The PDF is a hand-coded derivative of that function. Actually, there are two +// (independent) derivatives, as separate code paths are used for "small x" +// (2*x*x*n < pi) and "large x", mirroring the separate code paths in the +// Jacobi Theta implementation to achieve fast convergence. Quantiles are +// computed using a Newton-Raphson iteration from an initial guess that I +// arrived at by trial and error. +// +// The mean and variance are implemented using simple closed-form expressions. +// Skewness and kurtosis use slightly more complicated closed-form expressions +// that involve the zeta function. The mode is calculated at run-time by +// maximizing the PDF. If you have an analytical solution for the mode, feel +// free to plop it in. +// +// The CDF and PDF could almost certainly be re-implemented and sped up using a +// polynomial or rational approximation, since the only meaningful argument is +// x * sqrt(n). But that is left as an exercise for the next maintainer. +// +// In the future, the Pelz-Good approximation could be added. I suggest adding +// a second parameter representing the order, e.g. +// +// kolmogorov_smirnov_dist<>(100) // N=100, order=1 +// kolmogorov_smirnov_dist<>(100, 1) // N=100, order=1, i.e. Kolmogorov's formula +// kolmogorov_smirnov_dist<>(100, 4) // N=100, order=4, i.e. Pelz-Good formula +// +// The exact distribution could be added to the API with a special order +// parameter (e.g. 0 or infinity), or a separate distribution type altogether +// (e.g. kolmogorov_smirnov_exact_distribution). +// +#ifndef BOOST_MATH_DISTRIBUTIONS_KOLMOGOROV_SMIRNOV_HPP +#define BOOST_MATH_DISTRIBUTIONS_KOLMOGOROV_SMIRNOV_HPP + +#include +#include +#include +#include +#include +#include // Newton-Raphson +#include // For the mode + +namespace boost { namespace math { + +namespace detail { +template +inline RealType kolmogorov_smirnov_quantile_guess(RealType p) { + // Choose a starting point for the Newton-Raphson iteration + if (p > 0.9) + return RealType(1.8) - 5 * (1 - p); + if (p < 0.3) + return p + RealType(0.45); + return p + RealType(0.3); +} + +// d/dk (theta2(0, 1/(2*k*k/M_PI))/sqrt(2*k*k*M_PI)) +template +RealType kolmogorov_smirnov_pdf_small_x(RealType x, RealType n, const Policy&) { + BOOST_MATH_STD_USING + RealType value = RealType(0), delta = RealType(0), last_delta = RealType(0); + RealType eps = policies::get_epsilon(); + int i = 0; + RealType pi2 = constants::pi_sqr(); + RealType x2n = x*x*n; + if (x2n*x2n == 0.0) { + return static_cast(0); + } + while (1) { + delta = exp(-RealType(i+0.5)*RealType(i+0.5)*pi2/(2*x2n)) * (RealType(i+0.5)*RealType(i+0.5)*pi2 - x2n); + + if (delta == 0.0) + break; + + if (last_delta != 0.0 && fabs(delta/last_delta) < eps) + break; + + value += delta + delta; + last_delta = delta; + i++; + } + + return value * sqrt(n) * constants::root_half_pi() / (x2n*x2n); +} + +// d/dx (theta4(0, 2*x*x*n/M_PI)) +template +inline RealType kolmogorov_smirnov_pdf_large_x(RealType x, RealType n, const Policy&) { + BOOST_MATH_STD_USING + RealType value = RealType(0), delta = RealType(0), last_delta = RealType(0); + RealType eps = policies::get_epsilon(); + int i = 1; + while (1) { + delta = 8*x*i*i*exp(-2*i*i*x*x*n); + + if (delta == 0.0) + break; + + if (last_delta != 0.0 && fabs(delta / last_delta) < eps) + break; + + if (i%2 == 0) + delta = -delta; + + value += delta; + last_delta = delta; + i++; + } + + return value * n; +} + +}; // detail + +template > + class kolmogorov_smirnov_distribution +{ + public: + typedef RealType value_type; + typedef Policy policy_type; + + // Constructor + kolmogorov_smirnov_distribution( RealType n ) : n_obs_(n) + { + RealType result; + detail::check_df( + "boost::math::kolmogorov_smirnov_distribution<%1%>::kolmogorov_smirnov_distribution", n_obs_, &result, Policy()); + } + + RealType number_of_observations()const + { + return n_obs_; + } + + private: + + RealType n_obs_; // positive integer +}; + +typedef kolmogorov_smirnov_distribution kolmogorov_k; // Convenience typedef for double version. + +namespace detail { +template +struct kolmogorov_smirnov_quantile_functor +{ + kolmogorov_smirnov_quantile_functor(const boost::math::kolmogorov_smirnov_distribution dist, RealType const& p) + : distribution(dist), prob(p) + { + } + + boost::math::tuple operator()(RealType const& x) + { + RealType fx = cdf(distribution, x) - prob; // Difference cdf - value - to minimize. + RealType dx = pdf(distribution, x); // pdf is 1st derivative. + // return both function evaluation difference f(x) and 1st derivative f'(x). + return boost::math::make_tuple(fx, dx); + } +private: + const boost::math::kolmogorov_smirnov_distribution distribution; + RealType prob; +}; + +template +struct kolmogorov_smirnov_complementary_quantile_functor +{ + kolmogorov_smirnov_complementary_quantile_functor(const boost::math::kolmogorov_smirnov_distribution dist, RealType const& p) + : distribution(dist), prob(p) + { + } + + boost::math::tuple operator()(RealType const& x) + { + RealType fx = cdf(complement(distribution, x)) - prob; // Difference cdf - value - to minimize. + RealType dx = -pdf(distribution, x); // pdf is the negative of the derivative of (1-CDF) + // return both function evaluation difference f(x) and 1st derivative f'(x). + return boost::math::make_tuple(fx, dx); + } +private: + const boost::math::kolmogorov_smirnov_distribution distribution; + RealType prob; +}; + +template +struct kolmogorov_smirnov_negative_pdf_functor +{ + RealType operator()(RealType const& x) { + if (2*x*x < constants::pi()) { + return -kolmogorov_smirnov_pdf_small_x(x, static_cast(1), Policy()); + } + return -kolmogorov_smirnov_pdf_large_x(x, static_cast(1), Policy()); + } +}; +} // namespace detail + +template +inline const std::pair range(const kolmogorov_smirnov_distribution& /*dist*/) +{ // Range of permissible values for random variable x. + using boost::math::tools::max_value; + return std::pair(static_cast(0), max_value()); +} + +template +inline const std::pair support(const kolmogorov_smirnov_distribution& /*dist*/) +{ // Range of supported values for random variable x. + // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. + // In the exact distribution, the upper limit would be 1. + using boost::math::tools::max_value; + return std::pair(static_cast(0), max_value()); +} + +template +inline RealType pdf(const kolmogorov_smirnov_distribution& dist, const RealType& x) +{ + BOOST_FPU_EXCEPTION_GUARD + BOOST_MATH_STD_USING // for ADL of std functions. + + RealType n = dist.number_of_observations(); + RealType error_result; + static const char* function = "boost::math::pdf(const kolmogorov_smirnov_distribution<%1%>&, %1%)"; + if(false == detail::check_x_not_NaN(function, x, &error_result, Policy())) + return error_result; + + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + + if (x < 0 || !(boost::math::isfinite)(x)) + { + return policies::raise_domain_error( + function, "Kolmogorov-Smirnov parameter was %1%, but must be > 0 !", x, Policy()); + } + + if (2*x*x*n < constants::pi()) { + return detail::kolmogorov_smirnov_pdf_small_x(x, n, Policy()); + } + + return detail::kolmogorov_smirnov_pdf_large_x(x, n, Policy()); +} // pdf + +template +inline RealType cdf(const kolmogorov_smirnov_distribution& dist, const RealType& x) +{ + BOOST_MATH_STD_USING // for ADL of std function exp. + static const char* function = "boost::math::cdf(const kolmogorov_smirnov_distribution<%1%>&, %1%)"; + RealType error_result; + RealType n = dist.number_of_observations(); + if(false == detail::check_x_not_NaN(function, x, &error_result, Policy())) + return error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + if((x < 0) || !(boost::math::isfinite)(x)) { + return policies::raise_domain_error( + function, "Random variable parameter was %1%, but must be between > 0 !", x, Policy()); + } + + if (x*x*n == 0) + return 0; + + return jacobi_theta4tau(RealType(0), 2*x*x*n/constants::pi(), Policy()); +} // cdf + +template +inline RealType cdf(const complemented2_type, RealType>& c) { + BOOST_MATH_STD_USING // for ADL of std function exp. + RealType x = c.param; + static const char* function = "boost::math::cdf(const complemented2_type&, %1%>)"; + RealType error_result; + kolmogorov_smirnov_distribution const& dist = c.dist; + RealType n = dist.number_of_observations(); + + if(false == detail::check_x_not_NaN(function, x, &error_result, Policy())) + return error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + + if((x < 0) || !(boost::math::isfinite)(x)) + return policies::raise_domain_error( + function, "Random variable parameter was %1%, but must be between > 0 !", x, Policy()); + + if (x*x*n == 0) + return 1; + + if (2*x*x*n > constants::pi()) + return -jacobi_theta4m1tau(RealType(0), 2*x*x*n/constants::pi(), Policy()); + + return RealType(1) - jacobi_theta4tau(RealType(0), 2*x*x*n/constants::pi(), Policy()); +} // cdf (complemented) + +template +inline RealType quantile(const kolmogorov_smirnov_distribution& dist, const RealType& p) +{ + BOOST_MATH_STD_USING + static const char* function = "boost::math::quantile(const kolmogorov_smirnov_distribution<%1%>&, %1%)"; + // Error check: + RealType error_result; + RealType n = dist.number_of_observations(); + if(false == detail::check_probability(function, p, &error_result, Policy())) + return error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + + RealType k = detail::kolmogorov_smirnov_quantile_guess(p) / sqrt(n); + const int get_digits = policies::digits();// get digits from policy, + boost::uintmax_t m = policies::get_max_root_iterations(); // and max iterations. + + return tools::newton_raphson_iterate(detail::kolmogorov_smirnov_quantile_functor(dist, p), + k, RealType(0), boost::math::tools::max_value(), get_digits, m); +} // quantile + +template +inline RealType quantile(const complemented2_type, RealType>& c) { + BOOST_MATH_STD_USING + static const char* function = "boost::math::quantile(const kolmogorov_smirnov_distribution<%1%>&, %1%)"; + kolmogorov_smirnov_distribution const& dist = c.dist; + RealType n = dist.number_of_observations(); + // Error check: + RealType error_result; + RealType p = c.param; + + if(false == detail::check_probability(function, p, &error_result, Policy())) + return error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + + RealType k = detail::kolmogorov_smirnov_quantile_guess(RealType(1-p)) / sqrt(n); + + const int get_digits = policies::digits();// get digits from policy, + boost::uintmax_t m = policies::get_max_root_iterations(); // and max iterations. + + return tools::newton_raphson_iterate( + detail::kolmogorov_smirnov_complementary_quantile_functor(dist, p), + k, RealType(0), boost::math::tools::max_value(), get_digits, m); +} // quantile (complemented) + +template +inline RealType mode(const kolmogorov_smirnov_distribution& dist) +{ + BOOST_MATH_STD_USING + static const char* function = "boost::math::mode(const kolmogorov_smirnov_distribution<%1%>&)"; + RealType n = dist.number_of_observations(); + RealType error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + + std::pair r = boost::math::tools::brent_find_minima( + detail::kolmogorov_smirnov_negative_pdf_functor(), + static_cast(0), static_cast(1), policies::digits()); + return r.first / sqrt(n); +} + +// Mean and variance come directly from +// https://www.jstatsoft.org/article/view/v008i18 Section 3 +template +inline RealType mean(const kolmogorov_smirnov_distribution& dist) +{ + BOOST_MATH_STD_USING + static const char* function = "boost::math::mean(const kolmogorov_smirnov_distribution<%1%>&)"; + RealType n = dist.number_of_observations(); + RealType error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + return constants::root_half_pi() * constants::ln_two() / sqrt(n); +} + +template +inline RealType variance(const kolmogorov_smirnov_distribution& dist) +{ + static const char* function = "boost::math::variance(const kolmogorov_smirnov_distribution<%1%>&)"; + RealType n = dist.number_of_observations(); + RealType error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + return (constants::pi_sqr_div_six() + - constants::pi() * constants::ln_two() * constants::ln_two()) / (2*n); +} + +// Skewness and kurtosis come from integrating the PDF +// The alternating series pops out a Dirichlet eta function which is related to the zeta function +template +inline RealType skewness(const kolmogorov_smirnov_distribution& dist) +{ + BOOST_MATH_STD_USING + static const char* function = "boost::math::skewness(const kolmogorov_smirnov_distribution<%1%>&)"; + RealType n = dist.number_of_observations(); + RealType error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + RealType ex3 = RealType(0.5625) * constants::root_half_pi() * constants::zeta_three() / n / sqrt(n); + RealType mean = boost::math::mean(dist); + RealType var = boost::math::variance(dist); + return (ex3 - 3 * mean * var - mean * mean * mean) / var / sqrt(var); +} + +template +inline RealType kurtosis(const kolmogorov_smirnov_distribution& dist) +{ + BOOST_MATH_STD_USING + static const char* function = "boost::math::kurtosis(const kolmogorov_smirnov_distribution<%1%>&)"; + RealType n = dist.number_of_observations(); + RealType error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + RealType ex4 = 7 * constants::pi_sqr_div_six() * constants::pi_sqr_div_six() / 20 / n / n; + RealType mean = boost::math::mean(dist); + RealType var = boost::math::variance(dist); + RealType skew = boost::math::skewness(dist); + return (ex4 - 4 * mean * skew * var * sqrt(var) - 6 * mean * mean * var - mean * mean * mean * mean) / var / var; +} + +template +inline RealType kurtosis_excess(const kolmogorov_smirnov_distribution& dist) +{ + static const char* function = "boost::math::kurtosis_excess(const kolmogorov_smirnov_distribution<%1%>&)"; + RealType n = dist.number_of_observations(); + RealType error_result; + if(false == detail::check_df(function, n, &error_result, Policy())) + return error_result; + return kurtosis(dist) - 3; +} +}} +#endif diff --git a/reporting/accuracy/plot_kolmogorov_smirnov_pdf.cpp b/reporting/accuracy/plot_kolmogorov_smirnov_pdf.cpp new file mode 100644 index 0000000000..04261a03ee --- /dev/null +++ b/reporting/accuracy/plot_kolmogorov_smirnov_pdf.cpp @@ -0,0 +1,38 @@ +// Copyright Evan Miller 2020 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include + +using boost::math::tools::ulps_plot; + +int main() { + using PreciseReal = long double; + using CoarseReal = float; + + boost::math::kolmogorov_smirnov_distribution dist_coarse(10); + auto pdf_coarse = [&, dist_coarse](CoarseReal x) { + return boost::math::pdf(dist_coarse, x); + }; + boost::math::kolmogorov_smirnov_distribution dist_precise(10); + auto pdf_precise = [&, dist_precise](PreciseReal x) { + return boost::math::pdf(dist_precise, x); + }; + + int samples = 2500; + int width = 800; + PreciseReal clip = 100; + + std::string filename1 = "kolmogorov_smirnov_pdf_" + boost::core::demangle(typeid(CoarseReal).name()) + ".svg"; + auto plot1 = ulps_plot(pdf_precise, 0.0, 1.0, samples); + plot1.clip(clip).width(width); + std::string title1 = "Kolmogorov-Smirnov PDF (N=10) ULP plot at " + boost::core::demangle(typeid(CoarseReal).name()) + " precision"; + plot1.title(title1); + plot1.vertical_lines(10); + plot1.add_fn(pdf_coarse); + plot1.write(filename1); +} diff --git a/reporting/performance/test_distributions.cpp b/reporting/performance/test_distributions.cpp index ddd72ee96f..2b54d4760e 100644 --- a/reporting/performance/test_distributions.cpp +++ b/reporting/performance/test_distributions.cpp @@ -436,6 +436,16 @@ int main() test_boost_2_param(inverse_gaussian); + distribution_tester kolmogorov("KolmogorovSmirnov"); + kolmogorov.add_test_case(3, one_param_quantile >()); + kolmogorov.add_test_case(20, one_param_quantile >()); + kolmogorov.add_test_case(200, one_param_quantile >()); + kolmogorov.add_test_case(2000, one_param_quantile >()); + kolmogorov.add_test_case(20000, one_param_quantile >()); + kolmogorov.add_test_case(200000, one_param_quantile >()); + + test_boost_1_param(kolmogorov); + distribution_tester laplace("Laplace"); laplace.add_test_case(0, 1, two_param_quantile >()); laplace.add_test_case(20, 20, two_param_quantile >()); diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 9110a0130f..60dc9c154d 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -630,6 +630,7 @@ test-suite distribution_tests : [ run test_inverse_chi_squared_distribution.cpp ../../test/build//boost_unit_test_framework ] [ run test_inverse_gamma_distribution.cpp ../../test/build//boost_unit_test_framework ] [ run test_inverse_gaussian.cpp ../../test/build//boost_unit_test_framework ] + [ run test_kolmogorov_smirnov.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx11_hdr_initializer_list cxx11_auto_declarations cxx11_lambdas cxx11_unified_initialization_syntax cxx11_smart_ptr ] ] [ run test_laplace.cpp ../../test/build//boost_unit_test_framework ] [ run test_inv_hyp.cpp pch ../../test/build//boost_unit_test_framework ] [ run test_logistic_dist.cpp ../../test/build//boost_unit_test_framework ] diff --git a/test/compile_test/dist_kolmogorov_smirnov_incl_test.cpp b/test/compile_test/dist_kolmogorov_smirnov_incl_test.cpp new file mode 100644 index 0000000000..9bfe384ace --- /dev/null +++ b/test/compile_test/dist_kolmogorov_smirnov_incl_test.cpp @@ -0,0 +1,25 @@ +// Copyright Evan Miller 2020 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Basic sanity check that header +// #includes all the files that it needs to. +// +#include +// +// Note this header includes no other headers, this is +// important if this test is to be meaningful: +// +#include "test_compile_result.hpp" + +void compile_and_link_test() +{ + TEST_DIST_FUNC(kolmogorov_smirnov) +} + +template class boost::math::kolmogorov_smirnov_distribution >; +template class boost::math::kolmogorov_smirnov_distribution >; +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +template class boost::math::kolmogorov_smirnov_distribution >; +#endif diff --git a/test/compile_test/distribution_concept_check.cpp b/test/compile_test/distribution_concept_check.cpp index 26b236a55c..fc8d2fab84 100644 --- a/test/compile_test/distribution_concept_check.cpp +++ b/test/compile_test/distribution_concept_check.cpp @@ -33,6 +33,7 @@ void instantiate(RealType) function_requires > >(); function_requires > >(); function_requires > >(); + function_requires > >(); function_requires > >(); function_requires > >(); function_requires > >(); diff --git a/test/compile_test/instantiate.hpp b/test/compile_test/instantiate.hpp index be9dae129c..16bd7ece0c 100644 --- a/test/compile_test/instantiate.hpp +++ b/test/compile_test/instantiate.hpp @@ -76,6 +76,7 @@ void instantiate(RealType) function_requires > >(); function_requires > >(); function_requires > >(); + function_requires > >(); function_requires > >(); function_requires > >(); function_requires > >(); @@ -111,6 +112,7 @@ void instantiate(RealType) function_requires > >(); function_requires > >(); function_requires > >(); + function_requires > >(); function_requires > >(); function_requires > >(); function_requires > >(); diff --git a/test/test_kolmogorov_smirnov.cpp b/test/test_kolmogorov_smirnov.cpp new file mode 100644 index 0000000000..1d949fd6a6 --- /dev/null +++ b/test/test_kolmogorov_smirnov.cpp @@ -0,0 +1,102 @@ +// Copyright Evan Miller 2020 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) +// +#include +#include + +#define BOOST_TEST_MAIN +#include // for test_main +#include // for BOOST_CHECK_CLOSE +#include +#include + +template // Any floating-point type RealType. +void test_spots(RealType) +{ + using namespace boost::math; + // Test quantiles, CDFs, and complements + RealType eps = tools::epsilon(); + RealType tol = tools::epsilon() * 25; + for (int n=10; n<100; n += 10) { + kolmogorov_smirnov_distribution dist(n); + for (int i=0; i<1000; i++) { + RealType p = 1.0 * (i+1) / 1001; + RealType crit1 = quantile(dist, 1 - p); + RealType crit2 = quantile(complement(dist, p)); + RealType p1 = cdf(dist, crit1); + BOOST_CHECK_CLOSE_FRACTION(crit1, crit2, tol); + BOOST_CHECK_CLOSE_FRACTION(1 - p, p1, tol); + } + + for (int i=0; i<1000; i++) { + RealType x = 1.0 * (i+1) / 1001; + RealType p = cdf(dist, x); + RealType p1 = cdf(complement(dist, x)); + RealType x1; + if (p < 0.5) + x1 = quantile(dist, p); + else + x1 = quantile(complement(dist, p1)); + if (p > tol && p1 > tol) // skip the extreme tails + BOOST_CHECK_CLOSE_FRACTION(x, x1, tol); + } + } + + kolmogorov_smirnov_distribution dist(100); + + // Basics + BOOST_CHECK_THROW(pdf(dist, RealType(-1.0)), std::domain_error); + BOOST_CHECK_THROW(cdf(dist, RealType(-1.0)), std::domain_error); + BOOST_CHECK_THROW(quantile(dist, RealType(-1.0)), std::domain_error); + BOOST_CHECK_THROW(quantile(dist, RealType(2.0)), std::domain_error); + + // Confirm mode is at least a local minimum + RealType mode = boost::math::mode(dist); + + BOOST_TEST_CHECK(pdf(dist, mode) >= pdf(dist, mode - sqrt(eps))); + BOOST_TEST_CHECK(pdf(dist, mode) >= pdf(dist, mode + sqrt(eps))); + + // Test the moments - each one integrates the entire distribution + quadrature::exp_sinh integrator; + + auto f_one = [&, dist](RealType t) { return pdf(dist, t); }; + BOOST_CHECK_CLOSE_FRACTION(integrator.integrate(f_one, eps), RealType(1), tol); + + RealType mean = boost::math::mean(dist); + auto f_mean = [&, dist](RealType t) { return pdf(dist, t) * t; }; + BOOST_CHECK_CLOSE_FRACTION(integrator.integrate(f_mean, eps), mean, tol); + + RealType var = variance(dist); + auto f_var = [&, dist, mean](RealType t) { return pdf(dist, t) * (t - mean) * (t - mean); }; + BOOST_CHECK_CLOSE_FRACTION(integrator.integrate(f_var, eps), var, tol); + + RealType skew = skewness(dist); + auto f_skew = [&, dist, mean, var](RealType t) { return pdf(dist, t) + * (t - mean) * (t - mean) * (t - mean) / var / sqrt(var); }; + BOOST_CHECK_CLOSE_FRACTION(integrator.integrate(f_skew, eps), skew, 10*tol); + + RealType kurt = kurtosis(dist); + auto f_kurt= [&, dist, mean, var](RealType t) { return pdf(dist, t) + * (t - mean) * (t - mean) * (t - mean) * (t - mean) / var / var; }; + BOOST_CHECK_CLOSE_FRACTION(integrator.integrate(f_kurt, eps), kurt, 5*tol); + + BOOST_CHECK_CLOSE_FRACTION(kurt, kurtosis_excess(dist) + 3, eps); +} + +BOOST_AUTO_TEST_CASE( test_main ) +{ + BOOST_MATH_CONTROL_FP; + + // (Parameter value, arbitrarily zero, only communicates the floating point type). + test_spots(0.0F); // Test float. + test_spots(0.0); // Test double. +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS + test_spots(0.0L); // Test long double. +#if !defined(BOOST_MATH_NO_REAL_CONCEPT_TESTS) + test_spots(boost::math::concepts::real_concept(0.)); // Test real concept. +#endif +#endif +}