Skip to content
Closed
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix the names in kFold
  • Loading branch information
holdenk committed Apr 9, 2014
commit 2cb90b32090f3b9f52bfc15ab79c994ef63e670a
15 changes: 8 additions & 7 deletions mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -180,16 +180,17 @@ object MLUtils {

/**
* Return a k element list of pairs of RDDs with the first element of each pair
* containing a unique 1/Kth of the data and the second element contain the compliment of that.
* containing the validation data, a unique 1/Kth of the data and the second
* element, the training data, contain the compliment of that.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

list -> array
compliment -> complement

*/
def kFold[T : ClassTag](rdd: RDD[T], numFolds: Int, seed: Int): List[Pair[RDD[T], RDD[T]]] = {
def kFold[T : ClassTag](rdd: RDD[T], numFolds: Int, seed: Int): Array[(RDD[T], RDD[T])] = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

T : -> T: (remove the space in the middle)

val numFoldsF = numFolds.toFloat
(1 to numFolds).map { fold =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

map { -> map { (remove one space).

val sampler = new BernoulliSampler[T]((fold-1)/numFoldsF,fold/numFoldsF, complement = false)
val train = new PartitionwiseSampledRDD(rdd, sampler, seed)
val test = new PartitionwiseSampledRDD(rdd, sampler.cloneComplement(), seed)
(train, test)
}.toList
val sampler = new BernoulliSampler[T]((fold - 1) / numFoldsF, fold / numFoldsF, complement = false)
val validation = new PartitionwiseSampledRDD(rdd, sampler, seed)
val training = new PartitionwiseSampledRDD(rdd, sampler.cloneComplement(), seed)
(validation, training)
}.toArray
}

/**
Expand Down