-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-13495][SQL] Add Null Filters in the query plan for Filters/Joins based on their data constraints #11372
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
cc4323f
28050b3
0b1520c
2a469e8
80dab7e
013f97a
31b1700
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -76,6 +76,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] { | |
| CombineLimits, | ||
| CombineUnions, | ||
| // Constant folding and strength reduction | ||
| NullFiltering, | ||
| NullPropagation, | ||
| OptimizeIn, | ||
| ConstantFolding, | ||
|
|
@@ -585,6 +586,52 @@ object NullPropagation extends Rule[LogicalPlan] { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Attempts to eliminate reading (unnecessary) NULL values if they are not required for correctness | ||
| * by inserting isNotNull filters is the query plan. These filters are currently inserted beneath | ||
| * existing Filters and Join operators and are inferred based on their data constraints. | ||
| * | ||
| * Note: While this optimization is applicable to all types of join, it primarily benefits Inner and | ||
| * LeftSemi joins. | ||
| */ | ||
| object NullFiltering extends Rule[LogicalPlan] with PredicateHelper { | ||
| def apply(plan: LogicalPlan): LogicalPlan = plan transform { | ||
| case filter @ Filter(condition, child: LogicalPlan) => | ||
| // We generate a list of additional isNotNull filters from the operator's existing constraints | ||
| // but remove those that are either already part of the filter condition or are part of the | ||
| // operator's child constraints. | ||
| val newIsNotNullConstraints = filter.constraints.filter(_.isInstanceOf[IsNotNull]) -- | ||
| (child.constraints ++ splitConjunctivePredicates(condition)) | ||
| val newCondition = if (newIsNotNullConstraints.nonEmpty) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove newConditino and just return filter if this doesn't do anything so we can reuse that filter subplan |
||
| And(newIsNotNullConstraints.reduce(And), condition) | ||
| } else { | ||
| condition | ||
| } | ||
| Filter(newCondition, child) | ||
|
|
||
| case join @ Join(left: LogicalPlan, right: LogicalPlan, joinType: JoinType, | ||
| condition: Option[Expression]) => | ||
| val leftIsNotNullConstraints = join.constraints | ||
|
||
| .filter(_.isInstanceOf[IsNotNull]) | ||
| .filter(_.references.subsetOf(left.outputSet)) -- left.constraints | ||
| val rightIsNotNullConstraints = | ||
| join.constraints | ||
| .filter(_.isInstanceOf[IsNotNull]) | ||
| .filter(_.references.subsetOf(right.outputSet)) -- right.constraints | ||
| val newLeftChild = if (leftIsNotNullConstraints.nonEmpty) { | ||
| Filter(leftIsNotNullConstraints.reduce(And), left) | ||
| } else { | ||
| left | ||
| } | ||
| val newRightChild = if (rightIsNotNullConstraints.nonEmpty) { | ||
| Filter(rightIsNotNullConstraints.reduce(And), right) | ||
| } else { | ||
| right | ||
| } | ||
| Join(newLeftChild, newRightChild, joinType, condition) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here, would be nice to reuse |
||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Replaces [[Expression Expressions]] that can be statically evaluated with | ||
| * equivalent [[Literal]] values. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"in the query plan"