Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update logisticAggregatorSuite
  • Loading branch information
WeichenXu123 committed Aug 21, 2017
commit 0f28e5ea8fb3380d4be8b7771ebc69dd820419bd
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,17 @@ class LogisticAggregatorSuite extends SparkFunSuite with MLlibTestSparkContext {
val aggConstantFeature = getNewAggregator(instancesConstantFeature,
Vectors.dense(coefArray ++ interceptArray), fitIntercept = true, isMultinomial = true)
instances.foreach(aggConstantFeature.add)

// constant features should not affect gradient
assert(aggConstantFeature.gradient(0) === 0.0)
def validateGradient(grad: Vector): Unit = {
assert(grad(0) === 0.0)
grad.toArray.foreach { gradientValue =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with this test was that it checked that part of the gradient was zero, but didn't check that the rest of the gradient was correct. Here, you're checking that the rest of the gradient isn't nan or infinite, but not that it's actually correct. A more appropriate test, IMO, is to also run an aggregator over the same instances with the constant feature filtered out, then check that the portion of the gradients they share are the same. e.g.

    val aggConstantFeature = getNewAggregator(instancesConstantFeature,
      Vectors.dense(coefArray ++ interceptArray), fitIntercept = true, isMultinomial = true)
    val filteredInstances = instancesConstantFeature.map { case Instance(l, w, f) =>
      Instance(l, w, Vectors.dense(f.toArray.tail))
    }
    val aggMultinomial = getNewAggregator(filteredInstances,
      Vectors.dense(coefArray.slice(3, 6) ++ interceptArray), fitIntercept = true,
      isMultinomial = true)
    filteredInstances.foreach(aggMultinomial.add)
    instancesConstantFeature.foreach(aggConstantFeature.add)

    // constant features should not affect gradient
    assert(aggConstantFeature.gradient.toArray.take(numClasses) === Array.fill(numClasses)(0.0))
    assert(aggMultinomial.gradient.toArray === aggConstantFeature.gradient.toArray.slice(3, 9))

Just to note, this code is just for an example, not meant to be copy and pasted.

assert(!gradientValue.isNaN &&
gradientValue > Double.NegativeInfinity && gradientValue < Double.PositiveInfinity)
}
}

validateGradient(aggConstantFeature.gradient)

val binaryCoefArray = Array(1.0, 2.0)
val intercept = 1.0
Expand All @@ -248,6 +257,6 @@ class LogisticAggregatorSuite extends SparkFunSuite with MLlibTestSparkContext {
isMultinomial = false)
instances.foreach(aggConstantFeatureBinary.add)
// constant features should not affect gradient
assert(aggConstantFeatureBinary.gradient(0) === 0.0)
validateGradient(aggConstantFeatureBinary.gradient)
}
}