From f786f184f7b32d738ad2b946e12d6f6e4ed4b512 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 20 Dec 2020 23:57:15 +0800 Subject: [PATCH 1/6] Simplify conditional in predicate --- .../sql/catalyst/optimizer/Optimizer.scala | 1 + .../sql/catalyst/optimizer/expressions.scala | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index fdb9c5b4821d..bda2c94ccb26 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -103,6 +103,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveDispensableExpressions, SimplifyBinaryComparison, ReplaceNullWithFalseInPredicate, + SimplifyConditionalInPredicate, PruneFilters, SimplifyCasts, SimplifyCaseConversionExpressions, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index e6730c9275a1..668b6b31e052 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils /* * Optimization rules defined in this file should not affect the structure of the logical plan. @@ -570,6 +571,33 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper { } +object SimplifyConditionalInPredicate extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case f @ Filter(cond, _) => f.copy(condition = simplifyConditional(cond)) + } + + private def simplifyConditional(e: Expression): Expression = e match { + case cw @ CaseWhen(branches, elseValue) if cw.dataType == BooleanType && branches.size == 1 && + elseValue.forall(_.semanticEquals(FalseLiteral)) => + val (whenVal, thenVal) = branches.head + And(whenVal, thenVal) + case i @ If(pred, trueVal, FalseLiteral) if i.dataType == BooleanType => + And(pred, trueVal) + case e if e.dataType == BooleanType => + e + case e => + val message = "Expected a Boolean type expression in simplifyConditional, " + + s"but got the type `${e.dataType.catalogString}` in `${e.sql}`." + if (Utils.isTesting) { + throw new IllegalArgumentException(message) + } else { + logWarning(message) + e + } + } +} + + /** * Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition. * For example, when the expression is just checking to see if a string starts with a given From 448faf0c904950aa55cc743fac160f98fa04c438 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 23 Dec 2020 00:18:50 +0800 Subject: [PATCH 2/6] fix --- .../sql/catalyst/optimizer/Optimizer.scala | 2 +- .../SimplifyConditionalsInPredicate.scala | 82 +++++++++++++++++++ .../sql/catalyst/optimizer/expressions.scala | 28 ------- 3 files changed, 83 insertions(+), 29 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index bda2c94ccb26..daaa7841fa05 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -103,7 +103,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveDispensableExpressions, SimplifyBinaryComparison, ReplaceNullWithFalseInPredicate, - SimplifyConditionalInPredicate, + SimplifyConditionalsInPredicate, PruneFilters, SimplifyCasts, SimplifyCaseConversionExpressions, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala new file mode 100644 index 000000000000..d21da8e61b37 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, Expression, If, LambdaFunction, Literal, MapFilter, Not, Or} +import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.types.BooleanType +import org.apache.spark.util.Utils + + +object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { + + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case f @ Filter(cond, _) => f.copy(condition = simplifyConditional(cond)) + case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(simplifyConditional(cond))) + case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(simplifyConditional(cond))) + case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(simplifyConditional(cond))) + case p: LogicalPlan => p transformExpressions { + case i @ If(pred, _, _) => i.copy(predicate = simplifyConditional(pred)) + case cw @ CaseWhen(branches, _) => + val newBranches = branches.map { case (cond, value) => + simplifyConditional(cond) -> value + } + cw.copy(branches = newBranches) + case af @ ArrayFilter(_, lf @ LambdaFunction(func, _, _)) => + val newLambda = lf.copy(function = simplifyConditional(func)) + af.copy(function = newLambda) + case ae @ ArrayExists(_, lf @ LambdaFunction(func, _, _), false) => + val newLambda = lf.copy(function = simplifyConditional(func)) + ae.copy(function = newLambda) + case mf @ MapFilter(_, lf @ LambdaFunction(func, _, _)) => + val newLambda = lf.copy(function = simplifyConditional(func)) + mf.copy(function = newLambda) + } + } + + private def simplifyConditional(e: Expression): Expression = e match { + case Literal(null, BooleanType) => FalseLiteral + case And(left, right) => And(simplifyConditional(left), simplifyConditional(right)) + case Or(left, right) => Or(simplifyConditional(left), simplifyConditional(right)) + case If(cond, t, FalseLiteral) => And(cond, t) + case If(cond, t, TrueLiteral) => Or(Not(cond), t) + case If(cond, FalseLiteral, f) => And(Not(cond), f) + case If(cond, TrueLiteral, f) => Or(cond, f) + case CaseWhen(Seq((cond, trueValue)), + Some(FalseLiteral) | Some(Literal(null, BooleanType)) | None) => + And(cond, trueValue) + case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) => + Or(Not(cond), trueValue) + case CaseWhen(Seq((cond, FalseLiteral)), elseValue) => + And(Not(cond), elseValue.getOrElse(Literal(null, BooleanType))) + case CaseWhen(Seq((cond, TrueLiteral)), elseValue) => + Or(cond, elseValue.getOrElse(Literal(null, BooleanType))) + case e if e.dataType == BooleanType => e + case e => + val message = "Expected a Boolean type expression in simplifyConditional, " + + s"but got the type `${e.dataType.catalogString}` in `${e.sql}`." + if (Utils.isTesting) { + throw new IllegalArgumentException(message) + } else { + logWarning(message) + e + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 6a7f8dfd51e1..47b968f6ebdd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.util.Utils /* * Optimization rules defined in this file should not affect the structure of the logical plan. @@ -586,33 +585,6 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper { } -object SimplifyConditionalInPredicate extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan transform { - case f @ Filter(cond, _) => f.copy(condition = simplifyConditional(cond)) - } - - private def simplifyConditional(e: Expression): Expression = e match { - case cw @ CaseWhen(branches, elseValue) if cw.dataType == BooleanType && branches.size == 1 && - elseValue.forall(_.semanticEquals(FalseLiteral)) => - val (whenVal, thenVal) = branches.head - And(whenVal, thenVal) - case i @ If(pred, trueVal, FalseLiteral) if i.dataType == BooleanType => - And(pred, trueVal) - case e if e.dataType == BooleanType => - e - case e => - val message = "Expected a Boolean type expression in simplifyConditional, " + - s"but got the type `${e.dataType.catalogString}` in `${e.sql}`." - if (Utils.isTesting) { - throw new IllegalArgumentException(message) - } else { - logWarning(message) - e - } - } -} - - /** * Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition. * For example, when the expression is just checking to see if a string starts with a given From c90227a9dfbfc47efb1f2f399c50baf74d89be30 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 23 Dec 2020 14:35:48 +0800 Subject: [PATCH 3/6] fix --- .../SimplifyConditionalsInPredicate.scala | 19 +- ...SimplifyConditionalsInPredicateSuite.scala | 229 ++++++++++++++++++ .../q34.sf100/explain.txt | 12 +- .../q34.sf100/simplified.txt | 2 +- .../approved-plans-modified/q34/explain.txt | 8 +- .../q34/simplified.txt | 2 +- .../q73.sf100/explain.txt | 8 +- .../q73.sf100/simplified.txt | 2 +- .../approved-plans-modified/q73/explain.txt | 8 +- .../q73/simplified.txt | 2 +- .../approved-plans-v1_4/q34.sf100/explain.txt | 12 +- .../q34.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q34/explain.txt | 8 +- .../approved-plans-v1_4/q34/simplified.txt | 2 +- .../approved-plans-v1_4/q73.sf100/explain.txt | 12 +- .../q73.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q73/explain.txt | 8 +- .../approved-plans-v1_4/q73/simplified.txt | 2 +- .../approved-plans-v2_7/q34.sf100/explain.txt | 12 +- .../q34.sf100/simplified.txt | 2 +- .../approved-plans-v2_7/q34/explain.txt | 8 +- .../approved-plans-v2_7/q34/simplified.txt | 2 +- 22 files changed, 288 insertions(+), 76 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala index d21da8e61b37..3a7b51f772a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, Expression, If, LambdaFunction, Literal, MapFilter, Not, Or} +import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, Literal, Not, Or} import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule @@ -32,23 +32,6 @@ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(simplifyConditional(cond))) case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(simplifyConditional(cond))) case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(simplifyConditional(cond))) - case p: LogicalPlan => p transformExpressions { - case i @ If(pred, _, _) => i.copy(predicate = simplifyConditional(pred)) - case cw @ CaseWhen(branches, _) => - val newBranches = branches.map { case (cond, value) => - simplifyConditional(cond) -> value - } - cw.copy(branches = newBranches) - case af @ ArrayFilter(_, lf @ LambdaFunction(func, _, _)) => - val newLambda = lf.copy(function = simplifyConditional(func)) - af.copy(function = newLambda) - case ae @ ArrayExists(_, lf @ LambdaFunction(func, _, _), false) => - val newLambda = lf.copy(function = simplifyConditional(func)) - ae.copy(function = newLambda) - case mf @ MapFilter(_, lf @ LambdaFunction(func, _, _)) => - val newLambda = lf.copy(function = simplifyConditional(func)) - mf.copy(function = newLambda) - } } private def simplifyConditional(e: Expression): Expression = e match { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala new file mode 100644 index 000000000000..01b6d1b990ca --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or} +import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} +import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} +import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable} +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.types.{BooleanType, IntegerType} + +class SimplifyConditionalsInPredicateSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("SimplifyConditionalsInPredicate", FixedPoint(10), + NullPropagation, + ConstantFolding, + BooleanSimplification, + SimplifyConditionals, + SimplifyConditionalsInPredicate) :: Nil + } + + private val testRelation = + LocalRelation('i.int, 'b.boolean, 'a.array(IntegerType), 'm.map(IntegerType, IntegerType)) + private val anotherTestRelation = LocalRelation('d.int) + + test("if(cond, trueVal, false) => And(cond, trueVal)") { + val originalCond = If( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b"), + FalseLiteral) + val expectedCond = And( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("if(cond, trueVal, true) => or(not(cond), trueVal)") { + val originalCond = If( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b"), + TrueLiteral) + val expectedCond = Or( + UnresolvedAttribute("i") <= Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("if(cond, false, falseVal) => and(not(cond), falseVal)") { + val originalCond = If( + UnresolvedAttribute("i") > Literal(10), + FalseLiteral, + UnresolvedAttribute("b")) + val expectedCond = And( + UnresolvedAttribute("i") <= Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("if(cond, true, falseVal) => or(cond, falseVal)") { + val originalCond = If( + UnresolvedAttribute("i") > Literal(10), + TrueLiteral, + UnresolvedAttribute("b")) + val expectedCond = Or( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + + test("case when cond then trueVal else false end => And(cond, trueVal)") { + Seq(Some(FalseLiteral), None, Some(Literal(null, BooleanType))).foreach { elseExp => + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))), + elseExp) + val expectedCond = And( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + } + + test("case when cond then trueVal else true end => or(not(cond), trueVal)") { + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))), + TrueLiteral) + val expectedCond = Or( + UnresolvedAttribute("i") <= Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("case when cond then false else elseValue end => and(not(cond), elseValue)") { + Seq() + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), FalseLiteral)), + UnresolvedAttribute("b")) + val expectedCond = And( + UnresolvedAttribute("i") <= Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("case when cond then true else elseValue end => or(cond, elseValue)") { + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)), + UnresolvedAttribute("b")) + val expectedCond = Or( + UnresolvedAttribute("i") > Literal(10), + UnresolvedAttribute("b")) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("case when cond then true end => or(cond, null)") { + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral))) + val expectedCond = UnresolvedAttribute("i") > Literal(10) + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("Simplify conditional in conditions of CaseWhen inside another CaseWhen") { + val nestedCaseWhen = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10)) -> UnresolvedAttribute("b")), + FalseLiteral) + val originalCond = CaseWhen(Seq(IsNotNull(nestedCaseWhen) -> FalseLiteral)) + val expectedCond = FalseLiteral + + testFilter(originalCond, expectedCond = expectedCond) + testJoin(originalCond, expectedCond = expectedCond) + testDelete(originalCond, expectedCond = expectedCond) + testUpdate(originalCond, expectedCond = expectedCond) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("Not expected type - simplifyConditional") { + val e = intercept[AnalysisException] { + testFilter(originalCond = Literal(null, IntegerType), expectedCond = FalseLiteral) + }.getMessage + assert(e.contains("'CAST(NULL AS INT)' of type int is not a boolean")) + } + + private def testFilter(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, exp) => rel.where(exp), originalCond, expectedCond) + } + + private def testJoin(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, exp) => rel.join(anotherTestRelation, Inner, Some(exp)), originalCond, expectedCond) + } + + private def testProjection(originalExpr: Expression, expectedExpr: Expression): Unit = { + test((rel, exp) => rel.select(exp), originalExpr, expectedExpr) + } + + private def testDelete(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, expr) => DeleteFromTable(rel, Some(expr)), originalCond, expectedCond) + } + + private def testUpdate(originalCond: Expression, expectedCond: Expression): Unit = { + test((rel, expr) => UpdateTable(rel, Seq.empty, Some(expr)), originalCond, expectedCond) + } + + private def test( + func: (LogicalPlan, Expression) => LogicalPlan, + originalExpr: Expression, + expectedExpr: Expression): Unit = { + + val originalPlan = func(testRelation, originalExpr).analyze + val optimizedPlan = Optimize.execute(originalPlan) + val expectedPlan = func(testRelation, expectedExpr).analyze + comparePlans(optimizedPlan, expectedPlan) + } +} diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt index ac1fca4f67a0..547806128e64 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt @@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) (29) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23] (30) Sort [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] @@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24) (34) Exchange Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29] (35) Sort [codegen id : 8] Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] @@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (38) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), true, [id=#30] +Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30] (39) Sort [codegen id : 10] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt index d9b416ddba9e..c9945cda6774 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt @@ -47,7 +47,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt index 898d37403d6a..74bbb52c55fb 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt index 5af07f1d4dde..4484587f6535 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt index 25da173c8ecd..51b480ef64ab 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (35) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt index 7496388d3430..8695f9da1711 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt @@ -44,7 +44,7 @@ WholeStageCodegen (7) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt index e420b656c3ad..56ad4f4d926e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(`hd_dep_count`), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt index 46b724156571..5e49f6cb603d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt index 17bb0e7e71d2..6fa9bb85f0b7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt @@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) (29) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23] (30) Sort [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] @@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24) (34) Exchange Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29] (35) Sort [codegen id : 8] Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] @@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (38) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), true, [id=#30] +Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30] (39) Sort [codegen id : 10] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt index d9b416ddba9e..c9945cda6774 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt @@ -47,7 +47,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt index 18f465caea20..1aea77422b14 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt index 5af07f1d4dde..4484587f6535 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt index 4af604ca3f65..f88f1f48ac2b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt @@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 1) AND (cnt#22 <= 5)) (29) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23] (30) Sort [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] @@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24) (34) Exchange Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29] (35) Sort [codegen id : 8] Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] @@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (38) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#30] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30] (39) Sort [codegen id : 10] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt index af8527f155c8..9de2f2ab4cd6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt @@ -47,7 +47,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt index f4565c3edb17..43c73f3c7af6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29] +Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt index 46b724156571..5e49f6cb603d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt index c7b8685b64be..5d8f0d04161b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt @@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 5] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20)) (29) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] -Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23] (30) Sort [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22] @@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24) (34) Exchange Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29] (35) Sort [codegen id : 8] Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] @@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa (38) Exchange Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), true, [id=#30] +Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#30] (39) Sort [codegen id : 10] Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt index 451659e2c617..244478fd6882 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt @@ -47,7 +47,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt index 01b5f46bd5dd..e588993073a9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt @@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5, Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] @@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun (20) Filter [codegen id : 3] Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13)) +Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13)) (21) Project [codegen id : 3] Output [1]: [hd_demo_sk#13] @@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] (26) Exchange Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] (27) HashAggregate [codegen id : 6] Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19] @@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa (35) Exchange Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] -Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), true, [id=#29] +Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#29] (36) Sort [codegen id : 7] Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt index 8aa32fed5a17..22cab3a42862 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt @@ -41,7 +41,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] + Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] From cccbaf17813b68a93e1c982397bcf5477b75052e Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 24 Dec 2020 08:16:59 +0800 Subject: [PATCH 4/6] fix --- .../SimplifyConditionalsInPredicate.scala | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala index 3a7b51f772a6..987fda5b771c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -22,9 +22,27 @@ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLite import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.BooleanType -import org.apache.spark.util.Utils - +/** + * A rule that converting conditional expressions to predicate expressions, if possible, in the + * search condition of the WHERE/HAVING/ON(JOIN) clauses, which contain an implicit Boolean operator + * "(search condition) = TRUE". After this converting, we can potentially push the filter down to + * the data source. + * + * Supported cases are: + * - IF(cond, trueVal, false) => AND(cond, trueVal) + * - IF(cond, trueVal, true) => OR(NOT(cond), trueVal) + * - IF(cond, false, falseVal) => AND(NOT(cond), elseVal) + * - IF(cond, true, falseVal) => OR(cond, elseVal) + * - CASE WHEN cond THEN trueVal ELSE false END => AND(cond, trueVal) + * - CASE WHEN cond THEN trueVal END => AND(cond, trueVal) + * - CASE WHEN cond THEN trueVal ELSE null END => AND(cond, trueVal) + * - CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal) + * - CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal) + * - CASE WHEN cond THEN false END => AND(NOT(cond), false) + * - CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal) + * - CASE WHEN cond THEN true END => OR(cond, false) + */ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { @@ -35,31 +53,26 @@ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { } private def simplifyConditional(e: Expression): Expression = e match { - case Literal(null, BooleanType) => FalseLiteral case And(left, right) => And(simplifyConditional(left), simplifyConditional(right)) case Or(left, right) => Or(simplifyConditional(left), simplifyConditional(right)) - case If(cond, t, FalseLiteral) => And(cond, t) - case If(cond, t, TrueLiteral) => Or(Not(cond), t) - case If(cond, FalseLiteral, f) => And(Not(cond), f) - case If(cond, TrueLiteral, f) => Or(cond, f) + case If(cond, trueValue, FalseLiteral) => And(cond, trueValue) + case If(cond, trueValue, TrueLiteral) => Or(Not(cond), trueValue) + case If(cond, FalseLiteral, falseValue) => And(Not(cond), falseValue) + case If(cond, TrueLiteral, falseValue) => Or(cond, falseValue) case CaseWhen(Seq((cond, trueValue)), Some(FalseLiteral) | Some(Literal(null, BooleanType)) | None) => And(cond, trueValue) case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) => Or(Not(cond), trueValue) case CaseWhen(Seq((cond, FalseLiteral)), elseValue) => - And(Not(cond), elseValue.getOrElse(Literal(null, BooleanType))) + And(Not(cond), elseValue.getOrElse(FalseLiteral)) case CaseWhen(Seq((cond, TrueLiteral)), elseValue) => - Or(cond, elseValue.getOrElse(Literal(null, BooleanType))) + Or(cond, elseValue.getOrElse(FalseLiteral)) case e if e.dataType == BooleanType => e case e => - val message = "Expected a Boolean type expression in simplifyConditional, " + - s"but got the type `${e.dataType.catalogString}` in `${e.sql}`." - if (Utils.isTesting) { - throw new IllegalArgumentException(message) - } else { - logWarning(message) - e - } + assert(e.dataType != BooleanType, + "Expected a Boolean type expression in simplifyConditional, " + + s"but got the type `${e.dataType.catalogString}` in `${e.sql}`.") + e } } From 8bd9ef9e3136e67a49dbbfe8574725dfe9f38c94 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 24 Dec 2020 11:13:39 +0800 Subject: [PATCH 5/6] further optimize --- .../SimplifyConditionalsInPredicate.scala | 18 +++++++---- ...SimplifyConditionalsInPredicateSuite.scala | 32 ++++++++++++------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala index 987fda5b771c..59a0abf7b7d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -39,9 +39,9 @@ import org.apache.spark.sql.types.BooleanType * - CASE WHEN cond THEN trueVal ELSE null END => AND(cond, trueVal) * - CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal) * - CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal) - * - CASE WHEN cond THEN false END => AND(NOT(cond), false) + * - CASE WHEN cond THEN false END => false * - CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal) - * - CASE WHEN cond THEN true END => OR(cond, false) + * - CASE WHEN cond THEN true END => cond */ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { @@ -64,14 +64,18 @@ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { And(cond, trueValue) case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) => Or(Not(cond), trueValue) - case CaseWhen(Seq((cond, FalseLiteral)), elseValue) => - And(Not(cond), elseValue.getOrElse(FalseLiteral)) - case CaseWhen(Seq((cond, TrueLiteral)), elseValue) => - Or(cond, elseValue.getOrElse(FalseLiteral)) + case CaseWhen(Seq((_, FalseLiteral)), Some(FalseLiteral) | None) => + FalseLiteral + case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) => + And(Not(cond), elseValue) + case CaseWhen(Seq((cond, TrueLiteral)), Some(FalseLiteral) | None) => + cond + case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) => + Or(cond, elseValue) case e if e.dataType == BooleanType => e case e => assert(e.dataType != BooleanType, - "Expected a Boolean type expression in simplifyConditional, " + + "Expected a Boolean type expression in SimplifyConditionalsInPredicate, " + s"but got the type `${e.dataType.catalogString}` in `${e.sql}`.") e } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala index 01b6d1b990ca..1f3c24bdbb66 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala @@ -44,7 +44,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { LocalRelation('i.int, 'b.boolean, 'a.array(IntegerType), 'm.map(IntegerType, IntegerType)) private val anotherTestRelation = LocalRelation('d.int) - test("if(cond, trueVal, false) => And(cond, trueVal)") { + test("IF(cond, trueVal, false) => AND(cond, trueVal)") { val originalCond = If( UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"), @@ -59,7 +59,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } - test("if(cond, trueVal, true) => or(not(cond), trueVal)") { + test("IF(cond, trueVal, true) => OR(NOT(cond), trueVal)") { val originalCond = If( UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"), @@ -74,7 +74,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } - test("if(cond, false, falseVal) => and(not(cond), falseVal)") { + test("IF(cond, false, falseVal) => AND(NOT(cond), elseVal)") { val originalCond = If( UnresolvedAttribute("i") > Literal(10), FalseLiteral, @@ -89,7 +89,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } - test("if(cond, true, falseVal) => or(cond, falseVal)") { + test("IF(cond, true, falseVal) => OR(cond, elseVal)") { val originalCond = If( UnresolvedAttribute("i") > Literal(10), TrueLiteral, @@ -104,8 +104,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } - - test("case when cond then trueVal else false end => And(cond, trueVal)") { + test("CASE WHEN cond THEN trueVal ELSE false END => AND(cond, trueVal)") { Seq(Some(FalseLiteral), None, Some(Literal(null, BooleanType))).foreach { elseExp => val originalCond = CaseWhen( Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))), @@ -121,7 +120,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { } } - test("case when cond then trueVal else true end => or(not(cond), trueVal)") { + test("CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal)") { val originalCond = CaseWhen( Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))), TrueLiteral) @@ -135,8 +134,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } - test("case when cond then false else elseValue end => and(not(cond), elseValue)") { - Seq() + test("CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal)") { val originalCond = CaseWhen( Seq((UnresolvedAttribute("i") > Literal(10), FalseLiteral)), UnresolvedAttribute("b")) @@ -150,7 +148,17 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } - test("case when cond then true else elseValue end => or(cond, elseValue)") { + test("CASE WHEN cond THEN false END => false") { + val originalCond = CaseWhen( + Seq((UnresolvedAttribute("i") > Literal(10), FalseLiteral))) + testFilter(originalCond, expectedCond = FalseLiteral) + testJoin(originalCond, expectedCond = FalseLiteral) + testDelete(originalCond, expectedCond = FalseLiteral) + testUpdate(originalCond, expectedCond = FalseLiteral) + testProjection(originalCond, expectedExpr = originalCond) + } + + test("CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal)") { val originalCond = CaseWhen( Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)), UnresolvedAttribute("b")) @@ -164,7 +172,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } - test("case when cond then true end => or(cond, null)") { + test("CASE WHEN cond THEN true END => cond") { val originalCond = CaseWhen( Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral))) val expectedCond = UnresolvedAttribute("i") > Literal(10) @@ -189,7 +197,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { testProjection(originalCond, expectedExpr = originalCond) } - test("Not expected type - simplifyConditional") { + test("Not expected type - SimplifyConditionalsInPredicate") { val e = intercept[AnalysisException] { testFilter(originalCond = Literal(null, IntegerType), expectedCond = FalseLiteral) }.getMessage From 878beb0be0ee1f76186a59ec9db900445a52ddfa Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 24 Dec 2020 13:42:35 +0800 Subject: [PATCH 6/6] fix --- .../catalyst/optimizer/SimplifyConditionalsInPredicate.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala index 59a0abf7b7d9..1ea85085bccd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.BooleanType /** - * A rule that converting conditional expressions to predicate expressions, if possible, in the + * A rule that converts conditional expressions to predicate expressions, if possible, in the * search condition of the WHERE/HAVING/ON(JOIN) clauses, which contain an implicit Boolean operator * "(search condition) = TRUE". After this converting, we can potentially push the filter down to * the data source.