diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala index 66a52942e668..272d36dd94ae 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala @@ -270,11 +270,13 @@ private[ml] class LogisticAggregator( val margins = new Array[Double](numClasses) features.foreachActive { (index, value) => - val stdValue = value / localFeaturesStd(index) - var j = 0 - while (j < numClasses) { - margins(j) += localCoefficients(index * numClasses + j) * stdValue - j += 1 + if (localFeaturesStd(index) != 0.0 && value != 0.0) { + val stdValue = value / localFeaturesStd(index) + var j = 0 + while (j < numClasses) { + margins(j) += localCoefficients(index * numClasses + j) * stdValue + j += 1 + } } } var i = 0 diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index 0570499e7451..542977a48f0a 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -46,6 +46,7 @@ class LogisticRegressionSuite @transient var smallMultinomialDataset: Dataset[_] = _ @transient var binaryDataset: Dataset[_] = _ @transient var multinomialDataset: Dataset[_] = _ + @transient var multinomialDatasetWithZeroVar: Dataset[_] = _ private val eps: Double = 1e-5 override def beforeAll(): Unit = { @@ -99,6 +100,23 @@ class LogisticRegressionSuite df.cache() df } + + multinomialDatasetWithZeroVar = { + val nPoints = 100 + val coefficients = Array( + -0.57997, 0.912083, -0.371077, + -0.16624, -0.84355, -0.048509) + + val xMean = Array(5.843, 3.0) + val xVariance = Array(0.6856, 0.0) + + val testData = generateMultinomialLogisticInput( + coefficients, xMean, xVariance, addIntercept = true, nPoints, seed) + + val df = sc.parallelize(testData, 4).toDF().withColumn("weight", lit(1.0)) + df.cache() + df + } } /** @@ -112,6 +130,11 @@ class LogisticRegressionSuite multinomialDataset.rdd.map { case Row(label: Double, features: Vector, weight: Double) => label + "," + weight + "," + features.toArray.mkString(",") }.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDataset") + multinomialDatasetWithZeroVar.rdd.map { + case Row(label: Double, features: Vector, weight: Double) => + label + "," + weight + "," + features.toArray.mkString(",") + }.repartition(1) + .saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDatasetWithZeroVar") } test("params") { @@ -1392,6 +1415,61 @@ class LogisticRegressionSuite assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps) } + test("multinomial logistic regression with zero variance (SPARK-21681)") { + val sqlContext = multinomialDatasetWithZeroVar.sqlContext + import sqlContext.implicits._ + val mlr = new LogisticRegression().setFamily("multinomial").setFitIntercept(true) + .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true).setWeightCol("weight") + + val model = mlr.fit(multinomialDatasetWithZeroVar) + + /* + Use the following R code to load the data and train the model using glmnet package. + + library("glmnet") + data <- read.csv("path", header=FALSE) + label = as.factor(data$V1) + w = data$V2 + features = as.matrix(data.frame(data$V3, data$V4)) + coefficients = coef(glmnet(features, label, weights=w, family="multinomial", + alpha = 0, lambda = 0)) + coefficients + $`0` + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + 0.2658824 + data.V3 0.1881871 + data.V4 . + + $`1` + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + 0.53604701 + data.V3 -0.02412645 + data.V4 . + + $`2` + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + -0.8019294 + data.V3 -0.1640607 + data.V4 . + */ + + val coefficientsR = new DenseMatrix(3, 2, Array( + 0.1881871, 0.0, + -0.02412645, 0.0, + -0.1640607, 0.0), isTransposed = true) + val interceptsR = Vectors.dense(0.2658824, 0.53604701, -0.8019294) + + model.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps)) + + assert(model.coefficientMatrix ~== coefficientsR relTol 0.05) + assert(model.coefficientMatrix.toArray.sum ~== 0.0 absTol eps) + assert(model.interceptVector ~== interceptsR relTol 0.05) + assert(model.interceptVector.toArray.sum ~== 0.0 absTol eps) + } + test("multinomial logistic regression with intercept without regularization with bound") { // Bound constrained optimization with bound on one side. val lowerBoundsOnCoefficients = Matrices.dense(3, 4, Array.fill(12)(1.0)) diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregatorSuite.scala index 2b29c67d859d..16ef4af4f94e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregatorSuite.scala @@ -28,6 +28,7 @@ class LogisticAggregatorSuite extends SparkFunSuite with MLlibTestSparkContext { @transient var instances: Array[Instance] = _ @transient var instancesConstantFeature: Array[Instance] = _ + @transient var instancesConstantFeatureFiltered: Array[Instance] = _ override def beforeAll(): Unit = { super.beforeAll() @@ -41,6 +42,11 @@ class LogisticAggregatorSuite extends SparkFunSuite with MLlibTestSparkContext { Instance(1.0, 0.5, Vectors.dense(1.0, 1.0)), Instance(2.0, 0.3, Vectors.dense(1.0, 0.5)) ) + instancesConstantFeatureFiltered = Array( + Instance(0.0, 0.1, Vectors.dense(2.0)), + Instance(1.0, 0.5, Vectors.dense(1.0)), + Instance(2.0, 0.3, Vectors.dense(0.5)) + ) } /** Get summary statistics for some data and create a new LogisticAggregator. */ @@ -233,21 +239,44 @@ class LogisticAggregatorSuite extends SparkFunSuite with MLlibTestSparkContext { val binaryInstances = instancesConstantFeature.map { instance => if (instance.label <= 1.0) instance else Instance(0.0, instance.weight, instance.features) } + val binaryInstancesFiltered = instancesConstantFeatureFiltered.map { instance => + if (instance.label <= 1.0) instance else Instance(0.0, instance.weight, instance.features) + } val coefArray = Array(1.0, 2.0, -2.0, 3.0, 0.0, -1.0) + val coefArrayFiltered = Array(3.0, 0.0, -1.0) val interceptArray = Array(4.0, 2.0, -3.0) val aggConstantFeature = getNewAggregator(instancesConstantFeature, Vectors.dense(coefArray ++ interceptArray), fitIntercept = true, isMultinomial = true) - instances.foreach(aggConstantFeature.add) + val aggConstantFeatureFiltered = getNewAggregator(instancesConstantFeatureFiltered, + Vectors.dense(coefArrayFiltered ++ interceptArray), fitIntercept = true, isMultinomial = true) + + instancesConstantFeature.foreach(aggConstantFeature.add) + instancesConstantFeatureFiltered.foreach(aggConstantFeatureFiltered.add) + // constant features should not affect gradient - assert(aggConstantFeature.gradient(0) === 0.0) + def validateGradient(grad: Vector, gradFiltered: Vector, numCoefficientSets: Int): Unit = { + for (i <- 0 until numCoefficientSets) { + assert(grad(i) === 0.0) + assert(grad(numCoefficientSets + i) == gradFiltered(i)) + } + } + + validateGradient(aggConstantFeature.gradient, aggConstantFeatureFiltered.gradient, 3) val binaryCoefArray = Array(1.0, 2.0) + val binaryCoefArrayFiltered = Array(2.0) val intercept = 1.0 val aggConstantFeatureBinary = getNewAggregator(binaryInstances, Vectors.dense(binaryCoefArray ++ Array(intercept)), fitIntercept = true, isMultinomial = false) - instances.foreach(aggConstantFeatureBinary.add) + val aggConstantFeatureBinaryFiltered = getNewAggregator(binaryInstancesFiltered, + Vectors.dense(binaryCoefArrayFiltered ++ Array(intercept)), fitIntercept = true, + isMultinomial = false) + binaryInstances.foreach(aggConstantFeatureBinary.add) + binaryInstancesFiltered.foreach(aggConstantFeatureBinaryFiltered.add) + // constant features should not affect gradient - assert(aggConstantFeatureBinary.gradient(0) === 0.0) + validateGradient(aggConstantFeatureBinary.gradient, + aggConstantFeatureBinaryFiltered.gradient, 1) } }