Skip to content
Closed
Prev Previous commit
Next Next commit
Generate extra IsNotNull predicate for null check
  • Loading branch information
wangshuo128 committed Sep 27, 2019
commit 30e2ac725c9d2acdace84e22701779bebb8d1de9
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution

import java.util.concurrent.TimeUnit._

import scala.collection.mutable
import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration.Duration

Expand Down Expand Up @@ -110,7 +111,6 @@ case class FilterExec(condition: Expression, child: SparkPlan)

// The columns that will filtered out by `IsNotNull` could be considered as not nullable.
private val notNullAttributes = notNullPreds.flatMap(_.references).distinct.map(_.exprId)
.diff(otherPreds.flatMap(_.references).distinct.map(_.exprId))

// Mark this as empty. We'll evaluate the input during doConsume(). We don't want to evaluate
// all the variables at the beginning to take advantage of short circuiting.
Expand Down Expand Up @@ -172,13 +172,23 @@ case class FilterExec(condition: Expression, child: SparkPlan)
// This is very perf sensitive.
// TODO: revisit this. We can consider reordering predicates as well.
val generatedIsNotNullChecks = new Array[Boolean](notNullPreds.length)

val extraIsNotNullReferences = mutable.Set[Attribute]()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: extraIsNotNullAttrs


def outputContainsNotNull(ref: Attribute): Boolean = {
Copy link
Contributor

@cloud-fan cloud-fan Sep 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method always return true, so we add IsNotNull for all attributes which is sub-optimal. We should check notNullAttributes.contains(ref.exprId)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, my mistake.

output.exists { attr => attr.exprId == ref.exprId }
}

val generated = otherPreds.map { c =>
val nullChecks = c.references.map { r =>
val idx = notNullPreds.indexWhere { n => n.asInstanceOf[IsNotNull].child.semanticEquals(r)}
if (idx != -1 && !generatedIsNotNullChecks(idx)) {
generatedIsNotNullChecks(idx) = true
// Use the child's output. The nullability is what the child produced.
genPredicate(notNullPreds(idx), input, child.output)
} else if (outputContainsNotNull(r) && !extraIsNotNullReferences.contains(r)) {
extraIsNotNullReferences += r
genPredicate(IsNotNull(r), input, child.output)
} else {
""
}
Expand Down