-
Notifications
You must be signed in to change notification settings - Fork 29.1k
[SPARK-13871][SQL] Support for inferring filters from data constraints #11665
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -79,7 +79,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] { | |
| CombineLimits, | ||
| CombineUnions, | ||
| // Constant folding and strength reduction | ||
| NullFiltering, | ||
| InferFiltersFromConstraints, | ||
| NullPropagation, | ||
| OptimizeIn, | ||
| ConstantFolding, | ||
|
|
@@ -607,50 +607,44 @@ object NullPropagation extends Rule[LogicalPlan] { | |
| } | ||
|
|
||
| /** | ||
| * Attempts to eliminate reading (unnecessary) NULL values if they are not required for correctness | ||
| * by inserting isNotNull filters in the query plan. These filters are currently inserted beneath | ||
| * existing Filters and Join operators and are inferred based on their data constraints. | ||
| * Eliminate reading unnecessary values if they are not required for correctness (and can help in | ||
| * optimizing the query) by inserting relevant filters in the query plan based on an operator's | ||
| * data constraints. These filters are currently inserted to the existing conditions in the Filter | ||
| * operators and on either side of Join operators. | ||
| * | ||
| * Note: While this optimization is applicable to all types of join, it primarily benefits Inner and | ||
| * LeftSemi joins. | ||
| */ | ||
| object NullFiltering extends Rule[LogicalPlan] with PredicateHelper { | ||
| object InferFiltersFromConstraints extends Rule[LogicalPlan] with PredicateHelper { | ||
| // We generate a list of additional filters from the operator's existing constraint but remove | ||
|
||
| // those that are either already part of the operator's condition or are part of the operator's | ||
| // child constraints. | ||
| def apply(plan: LogicalPlan): LogicalPlan = plan transform { | ||
| case filter @ Filter(condition, child) => | ||
| // We generate a list of additional isNotNull filters from the operator's existing constraints | ||
| // but remove those that are either already part of the filter condition or are part of the | ||
| // operator's child constraints. | ||
| val newIsNotNullConstraints = filter.constraints.filter(_.isInstanceOf[IsNotNull]) -- | ||
| // For the Filter operator, we try to generate additional filters by only inferring the | ||
| // IsNotNull constraints. These IsNotNull filters are then used while generating the | ||
| // physical plan to quickly short circuit the null checks in the generated code. | ||
| val newFilters = filter.constraints.filter(_.isInstanceOf[IsNotNull]) -- | ||
|
||
| (child.constraints ++ splitConjunctivePredicates(condition)) | ||
| if (newIsNotNullConstraints.nonEmpty) { | ||
| Filter(And(newIsNotNullConstraints.reduce(And), condition), child) | ||
| if (newFilters.nonEmpty) { | ||
| Filter(And(newFilters.reduce(And), condition), child) | ||
| } else { | ||
| filter | ||
| } | ||
|
|
||
| case join @ Join(left, right, joinType, condition) => | ||
| val leftIsNotNullConstraints = join.constraints | ||
| .filter(_.isInstanceOf[IsNotNull]) | ||
| .filter(_.references.subsetOf(left.outputSet)) -- left.constraints | ||
| val rightIsNotNullConstraints = | ||
| join.constraints | ||
| .filter(_.isInstanceOf[IsNotNull]) | ||
| .filter(_.references.subsetOf(right.outputSet)) -- right.constraints | ||
| val newLeftChild = if (leftIsNotNullConstraints.nonEmpty) { | ||
| Filter(leftIsNotNullConstraints.reduce(And), left) | ||
| } else { | ||
| left | ||
| } | ||
| val newRightChild = if (rightIsNotNullConstraints.nonEmpty) { | ||
| Filter(rightIsNotNullConstraints.reduce(And), right) | ||
| } else { | ||
| right | ||
| } | ||
| if (newLeftChild != left || newRightChild != right) { | ||
| Join(newLeftChild, newRightChild, joinType, condition) | ||
| } else { | ||
| join | ||
| case join@Join(left, right, joinType, conditionOpt) => | ||
|
||
| val additionalConstraints = join.constraints.filter { c => | ||
| // Only consider constraints that can be pushed down to either the left or the right child | ||
| c.references.subsetOf(left.outputSet) || c.references.subsetOf(right.outputSet)} -- | ||
| (left.constraints ++ right.constraints) | ||
| val newConditionOpt = conditionOpt match { | ||
| case Some(condition) => | ||
| val newFilters = additionalConstraints -- splitConjunctivePredicates(condition) | ||
| if (newFilters.nonEmpty) Option(And(newFilters.reduce(And), condition)) else None | ||
|
||
| case None => | ||
| if (additionalConstraints.nonEmpty) Option(additionalConstraints.reduce(And)) else None | ||
|
||
| } | ||
| if (newConditionOpt.isDefined) Join(left, right, joinType, newConditionOpt) else join | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This rule doesn't seem related to the comment above it (same for nullfiltering)