Skip to content
Closed
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix up error messages in the MLUtilsSuite
  • Loading branch information
holdenk committed Apr 9, 2014
commit 150889c8c8a23a74ca63fbeaf09300de37712b0b
Original file line number Diff line number Diff line change
Expand Up @@ -120,23 +120,23 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
for (seed <- 1 to 5) {
val foldedRdds = MLUtils.kFold(data, folds, seed)
assert(foldedRdds.size === folds)
foldedRdds.map{case (test, train) =>
foldedRdds.map { case (test, train) =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

case (validation, training)

val result = test.union(train).collect().sorted
val testSize = test.collect().size.toFloat
assert(testSize > 0, "Non empty test data")
assert(testSize > 0, "empty test data")
val p = 1 / folds.toFloat
// Within 3 standard deviations of the mean
val range = 3 * math.sqrt(100 * p * (1-p))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(1-p) -> (1 - p)

val expected = 100 * p
val lowerBound = expected - range
val upperBound = expected + range
assert(testSize > lowerBound,
"Test data (" + testSize + ") smaller than expected (" + lowerBound +")" )
s"Test data ($testSize) smaller than expected ($lowerBound)" )
assert(testSize < upperBound,
"Test data (" + testSize + ") larger than expected (" + upperBound +")" )
assert(train.collect().size > 0, "Non empty training data")
s"Test data ($testSize) larger than expected ($upperBound)" )
assert(train.collect().size > 0, "empty training data")
assert(result === collectedData,
"Each training+test set combined contains all of the data")
"Each training+test set combined should contain all of the data.")
}
// K fold cross validation should only have each element in the test set exactly once
assert(foldedRdds.map(_._1).reduce((x,y) => x.union(y)).collect().sorted ===
Expand Down