@@ -26,7 +26,7 @@ import org.apache.spark.Logging
2626import org .apache .spark .rdd .RDD
2727
2828/**
29- * Conduct the two-sided Kolmogorov Smirnov test for data sampled from a
29+ * Conduct the two-sided Kolmogorov Smirnov (KS) test for data sampled from a
3030 * continuous distribution. By comparing the largest difference between the empirical cumulative
3131 * distribution of the sample data and the theoretical distribution we can provide a test for the
3232 * the null hypothesis that the sample data comes from that theoretical distribution.
@@ -47,7 +47,7 @@ import org.apache.spark.rdd.RDD
4747 * appropriate constant (the cumulative sum of number of elements in the prior partitions divided by
4848 * thedata set size). Finally, we take the maximum absolute value, and this is the statistic.
4949 */
50- private [stat] object KSTest extends Logging {
50+ private [stat] object KolmogorovSmirnovTest extends Logging {
5151
5252 // Null hypothesis for the type of KS test to be included in the result.
5353 object NullHypothesis extends Enumeration {
@@ -59,10 +59,10 @@ private[stat] object KSTest extends Logging {
5959 * Runs a KS test for 1 set of sample data, comparing it to a theoretical distribution
6060 * @param data `RDD[Double]` data on which to run test
6161 * @param cdf `Double => Double` function to calculate the theoretical CDF
62- * @return [[org.apache.spark.mllib.stat.test.KSTestResult ]] summarizing the test results
63- * (p-value, statistic, and null hypothesis)
62+ * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult ]] summarizing the test
63+ * results (p-value, statistic, and null hypothesis)
6464 */
65- def testOneSample (data : RDD [Double ], cdf : Double => Double ): KSTestResult = {
65+ def testOneSample (data : RDD [Double ], cdf : Double => Double ): KolmogorovSmirnovTestResult = {
6666 val n = data.count().toDouble
6767 val localData = data.sortBy(x => x).mapPartitions { part =>
6868 val partDiffs = oneSampleDifferences(part, n, cdf) // local distances
@@ -76,10 +76,10 @@ private[stat] object KSTest extends Logging {
7676 * Runs a KS test for 1 set of sample data, comparing it to a theoretical distribution
7777 * @param data `RDD[Double]` data on which to run test
7878 * @param distObj `RealDistribution` a theoretical distribution
79- * @return [[org.apache.spark.mllib.stat.test.KSTestResult ]] summarizing the test results
80- * (p-value, statistic, and null hypothesis)
79+ * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult ]] summarizing the test
80+ * results (p-value, statistic, and null hypothesis)
8181 */
82- def testOneSample (data : RDD [Double ], distObj : RealDistribution ): KSTestResult = {
82+ def testOneSample (data : RDD [Double ], distObj : RealDistribution ): KolmogorovSmirnovTestResult = {
8383 val cdf = (x : Double ) => distObj.cumulativeProbability(x)
8484 testOneSample(data, cdf)
8585 }
@@ -158,11 +158,12 @@ private[stat] object KSTest extends Logging {
158158 * @param data the sample data that we wish to evaluate
159159 * @param distName the name of the theoretical distribution
160160 * @param params Variable length parameter for distribution's parameters
161- * @return [[org.apache.spark.mllib.stat.test.KSTestResult ]] summarizing the test results
162- * (p-value, statistic, and null hypothesis)
161+ * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult ]] summarizing the
162+ * test results (p-value, statistic, and null hypothesis)
163163 */
164164 @ varargs
165- def testOneSample (data : RDD [Double ], distName : String , params : Double * ): KSTestResult = {
165+ def testOneSample (data : RDD [Double ], distName : String , params : Double * )
166+ : KolmogorovSmirnovTestResult = {
166167 val distObj =
167168 distName match {
168169 case " norm" => {
@@ -185,9 +186,9 @@ private[stat] object KSTest extends Logging {
185186 testOneSample(data, distObj)
186187 }
187188
188- private def evalOneSampleP (ksStat : Double , n : Long ): KSTestResult = {
189+ private def evalOneSampleP (ksStat : Double , n : Long ): KolmogorovSmirnovTestResult = {
189190 val pval = 1 - new KolmogorovSmirnovTest ().cdf(ksStat, n.toInt)
190- new KSTestResult (pval, ksStat, NullHypothesis .OneSampleTwoSided .toString)
191+ new KolmogorovSmirnovTestResult (pval, ksStat, NullHypothesis .OneSampleTwoSided .toString)
191192 }
192193}
193194
0 commit comments