renamed KSTestResult to KolmogorovSmirnovTestResult, to stay consistent with method name

jose.cambronero · jose.cambronero · commit bbb30b1db0fc · 2015-07-10T17:56:08.000-07:00
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
@@ -444,14 +444,14 @@ import org.apache.spark.mllib.stat.Statistics._
 val data: RDD[Double] = ... // an RDD of sample data
 
 // run a KS test for the sample versus a standard normal distribution
-val ksTestResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1)
-println(ksTestResult) // summary of the test including the p-value, test statistic,
+val testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1)
+println(testResult) // summary of the test including the p-value, test statistic,
                       // and null hypothesis
                       // if our p-value indicates significance, we can reject the null hypothesis
 
 // perform a KS test using a cumulative distribution function of our making
 val myCDF: Double => Double = ...
-val ksTestResult = Statistics.kolmogorovSmirnovTest(data, myCDF)
+val testResult2 = Statistics.kolmogorovSmirnovTest(data, myCDF)
 {% endhighlight %}
 </div>
 </div>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -25,7 +25,8 @@ import org.apache.spark.mllib.linalg.distributed.RowMatrix
 import org.apache.spark.mllib.linalg.{Matrix, Vector}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.stat.correlation.Correlations
-import org.apache.spark.mllib.stat.test.{ChiSqTest, ChiSqTestResult, KSTest, KSTestResult}
+import org.apache.spark.mllib.stat.test.{ChiSqTest, ChiSqTestResult, KolmogorovSmirnovTest,
+  KolmogorovSmirnovTestResult}
 import org.apache.spark.rdd.RDD
 
 /**
@@ -171,11 +172,12 @@ object Statistics {
    *
    * @param data an `RDD[Double]` containing the sample of data to test
    * @param cdf a `Double => Double` function to calculate the theoretical CDF at a given value
-   * @return [[org.apache.spark.mllib.stat.test.KSTestResult]] object containing test statistic,
-   *        p-value, and null hypothesis.
+   * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] object containing test
+   *        statistic, p-value, and null hypothesis.
    */
-  def kolmogorovSmirnovTest(data: RDD[Double], cdf: Double => Double): KSTestResult = {
-    KSTest.testOneSample(data, cdf)
+  def kolmogorovSmirnovTest(data: RDD[Double], cdf: Double => Double)
+    : KolmogorovSmirnovTestResult = {
+    KolmogorovSmirnovTest.testOneSample(data, cdf)
   }
 
   /**
@@ -186,11 +188,12 @@ object Statistics {
    * @param data an `RDD[Double]` containing the sample of data to test
    * @param distName a `String` name for a theoretical distribution
    * @param params `Double*` specifying the parameters to be used for the theoretical distribution
-   * @return [[org.apache.spark.mllib.stat.test.KSTestResult]] object containing test statistic,
-   *        p-value, and null hypothesis.
+   * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] object containing test
+   *        statistic, p-value, and null hypothesis.
    */
   @varargs
-  def kolmogorovSmirnovTest(data: RDD[Double], distName: String, params: Double*): KSTestResult = {
-    KSTest.testOneSample(data, distName, params: _*)
+  def kolmogorovSmirnovTest(data: RDD[Double], distName: String, params: Double*)
+    : KolmogorovSmirnovTestResult = {
+    KolmogorovSmirnovTest.testOneSample(data, distName, params: _*)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 
 /**
- * Conduct the two-sided Kolmogorov Smirnov test for data sampled from a
+ * Conduct the two-sided Kolmogorov Smirnov (KS) test for data sampled from a
  * continuous distribution. By comparing the largest difference between the empirical cumulative
  * distribution of the sample data and the theoretical distribution we can provide a test for the
  * the null hypothesis that the sample data comes from that theoretical distribution.
@@ -47,7 +47,7 @@ import org.apache.spark.rdd.RDD
  * appropriate constant (the cumulative sum of number of elements in the prior partitions divided by
  * thedata set size). Finally, we take the maximum absolute value, and this is the statistic.
  */
-private[stat] object KSTest extends Logging {
+private[stat] object KolmogorovSmirnovTest extends Logging {
 
   // Null hypothesis for the type of KS test to be included in the result.
   object NullHypothesis extends Enumeration {
@@ -59,10 +59,10 @@ private[stat] object KSTest extends Logging {
    * Runs a KS test for 1 set of sample data, comparing it to a theoretical distribution
    * @param data `RDD[Double]` data on which to run test
    * @param cdf `Double => Double` function to calculate the theoretical CDF
-   * @return [[org.apache.spark.mllib.stat.test.KSTestResult]] summarizing the test results
-   *        (p-value, statistic, and null hypothesis)
+   * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] summarizing the test
+   *        results (p-value, statistic, and null hypothesis)
    */
-  def testOneSample(data: RDD[Double], cdf: Double => Double): KSTestResult = {
+  def testOneSample(data: RDD[Double], cdf: Double => Double): KolmogorovSmirnovTestResult = {
     val n = data.count().toDouble
     val localData = data.sortBy(x => x).mapPartitions { part =>
       val partDiffs = oneSampleDifferences(part, n, cdf) // local distances
@@ -76,10 +76,10 @@ private[stat] object KSTest extends Logging {
    * Runs a KS test for 1 set of sample data, comparing it to a theoretical distribution
    * @param data `RDD[Double]` data on which to run test
    * @param distObj `RealDistribution` a theoretical distribution
-   * @return [[org.apache.spark.mllib.stat.test.KSTestResult]] summarizing the test results
-   *        (p-value, statistic, and null hypothesis)
+   * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] summarizing the test
+   *        results (p-value, statistic, and null hypothesis)
    */
-  def testOneSample(data: RDD[Double], distObj: RealDistribution): KSTestResult = {
+  def testOneSample(data: RDD[Double], distObj: RealDistribution): KolmogorovSmirnovTestResult = {
     val cdf = (x: Double) => distObj.cumulativeProbability(x)
     testOneSample(data, cdf)
   }
@@ -158,11 +158,12 @@ private[stat] object KSTest extends Logging {
    * @param data the sample data that we wish to evaluate
    * @param distName the name of the theoretical distribution
    * @param params Variable length parameter for distribution's parameters
-   * @return [[org.apache.spark.mllib.stat.test.KSTestResult]] summarizing the test results
-   *        (p-value, statistic, and null hypothesis)
+   * @return [[org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult]] summarizing the
+   *        test results (p-value, statistic, and null hypothesis)
    */
   @varargs
-  def testOneSample(data: RDD[Double], distName: String, params: Double*): KSTestResult = {
+  def testOneSample(data: RDD[Double], distName: String, params: Double*)
+    : KolmogorovSmirnovTestResult = {
     val distObj =
       distName match {
         case "norm" => {
@@ -185,9 +186,9 @@ private[stat] object KSTest extends Logging {
     testOneSample(data, distObj)
   }
 
-  private def evalOneSampleP(ksStat: Double, n: Long): KSTestResult = {
+  private def evalOneSampleP(ksStat: Double, n: Long): KolmogorovSmirnovTestResult = {
     val pval = 1 - new KolmogorovSmirnovTest().cdf(ksStat, n.toInt)
-    new KSTestResult(pval, ksStat, NullHypothesis.OneSampleTwoSided.toString)
+    new KolmogorovSmirnovTestResult(pval, ksStat, NullHypothesis.OneSampleTwoSided.toString)
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
@@ -96,7 +96,7 @@ class ChiSqTestResult private[stat] (override val pValue: Double,
  * Object containing the test results for the Kolmogorov-Smirnov test.
  */
 @Experimental
-class KSTestResult private[stat] (
+class KolmogorovSmirnovTestResult private[stat] (
     override val pValue: Double,
     override val statistic: Double,
     override val nullHypothesis: String) extends TestResult[Int] {