Skip to content

Commit bd6bfac

Browse files
PenguinToastgatorsmile
authored andcommitted
[SPARK-24879][SQL] Fix NPE in Hive partition pruning filter pushdown
## What changes were proposed in this pull request? We get a NPE when we have a filter on a partition column of the form `col in (x, null)`. This is due to the filter converter in HiveShim not handling `null`s correctly. This patch fixes this bug while still pushing down as much of the partition pruning predicates as possible, by filtering out `null`s from any `in` predicate. Since Hive only supports very simple partition pruning filters, this change should preserve correctness. ## How was this patch tested? Unit tests, manual tests Author: William Sheu <[email protected]> Closes #21832 from PenguinToast/partition-pruning-npe. (cherry picked from commit bbd6f0c) Signed-off-by: Xiao Li <[email protected]>
1 parent db1f3cc commit bd6bfac

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
599599

600600
object ExtractableLiteral {
601601
def unapply(expr: Expression): Option[String] = expr match {
602+
case Literal(null, _) => None // `null`s can be cast as other types; we want to avoid NPEs.
602603
case Literal(value, _: IntegralType) => Some(value.toString)
603604
case Literal(value, _: StringType) => Some(quoteStringLiteral(value.toString))
604605
case _ => None
@@ -607,7 +608,23 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
607608

608609
object ExtractableLiterals {
609610
def unapply(exprs: Seq[Expression]): Option[Seq[String]] = {
610-
val extractables = exprs.map(ExtractableLiteral.unapply)
611+
// SPARK-24879: The Hive metastore filter parser does not support "null", but we still want
612+
// to push down as many predicates as we can while still maintaining correctness.
613+
// In SQL, the `IN` expression evaluates as follows:
614+
// > `1 in (2, NULL)` -> NULL
615+
// > `1 in (1, NULL)` -> true
616+
// > `1 in (2)` -> false
617+
// Since Hive metastore filters are NULL-intolerant binary operations joined only by
618+
// `AND` and `OR`, we can treat `NULL` as `false` and thus rewrite `1 in (2, NULL)` as
619+
// `1 in (2)`.
620+
// If the Hive metastore begins supporting NULL-tolerant predicates and Spark starts
621+
// pushing down these predicates, then this optimization will become incorrect and need
622+
// to be changed.
623+
val extractables = exprs
624+
.filter {
625+
case Literal(null, _) => false
626+
case _ => true
627+
}.map(ExtractableLiteral.unapply)
611628
if (extractables.nonEmpty && extractables.forall(_.isDefined)) {
612629
Some(extractables.map(_.get))
613630
} else {

sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,20 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
7272
(Literal("p2\" and q=\"q2") === a("stringcol", StringType)) :: Nil,
7373
"""stringcol = 'p1" and q="q1' and 'p2" and q="q2' = stringcol""")
7474

75+
filterTest("SPARK-24879 null literals should be ignored for IN constructs",
76+
(a("intcol", IntegerType) in (Literal(1), Literal(null))) :: Nil,
77+
"(intcol = 1)")
78+
79+
// Applying the predicate `x IN (NULL)` should return an empty set, but since this optimization
80+
// will be applied by Catalyst, this filter converter does not need to account for this.
81+
filterTest("SPARK-24879 IN predicates with only NULLs will not cause a NPE",
82+
(a("intcol", IntegerType) in Literal(null)) :: Nil,
83+
"")
84+
85+
filterTest("typecast null literals should not be pushed down in simple predicates",
86+
(a("intcol", IntegerType) === Literal(null, IntegerType)) :: Nil,
87+
"")
88+
7589
private def filterTest(name: String, filters: Seq[Expression], result: String) = {
7690
test(name) {
7791
withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> "true") {

0 commit comments

Comments
 (0)