Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added API for RDD[Vector]
  • Loading branch information
dorx committed Jul 25, 2014
commit 706d436aea3db8b8cf15db0bcccb25e19c121a78
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,10 @@ object Statistics {
def chiSquared(x: RDD[Double], y: RDD[Double]): ChiSquaredTestResult = {
ChiSquaredTest.chiSquared(x, y)
}

def chiSquared(X: RDD[Vector], method: String): ChiSquaredTestResult = {
ChiSquaredTest.chiSquared(X, method)
}

def chiSquared(X: RDD[Vector]): ChiSquaredTestResult = ChiSquaredTest.chiSquared(X)
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.mllib.stat.test

import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.Vector

private[stat] object ChiSquaredTest {

Expand All @@ -32,6 +33,16 @@ private[stat] object ChiSquaredTest {
}
}

def chiSquared(X: RDD[Vector],
method: String = PEARSON): ChiSquaredTestResult = {
method match {
case PEARSON => chiSquaredPearson(X)
case _ => throw new IllegalArgumentException("Unrecognized method for Chi squared test.")
}
}

private def chiSquaredPearson(x: RDD[Double], y: RDD[Double]): ChiSquaredTestResult = ???

private def chiSquaredPearson(X: RDD[Vector]): ChiSquaredTestResult = ???

}