-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-29231][SQL] Constraints should be inferred from cast equality constraint #27252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
048a0ec
34c7001
7dcfe91
2060190
420058b
8dcd076
47eadf4
d283028
d055eba
4b14b3f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,11 +62,17 @@ trait ConstraintHelper { | |
| */ | ||
| def inferAdditionalConstraints(constraints: Set[Expression]): Set[Expression] = { | ||
| var inferredConstraints = Set.empty[Expression] | ||
| // IsNotNull should be constructed by `constructIsNotNullConstraints`. | ||
| val predicates = constraints.filterNot(_.isInstanceOf[IsNotNull]) | ||
| constraints.foreach { | ||
| case eq @ EqualTo(l: Attribute, r: Attribute) => | ||
| val candidateConstraints = constraints - eq | ||
| val candidateConstraints = predicates - eq | ||
| inferredConstraints ++= replaceConstraints(candidateConstraints, l, r) | ||
| inferredConstraints ++= replaceConstraints(candidateConstraints, r, l) | ||
| case eq @ EqualTo(l @ Cast(_: Attribute, _, _), r: Attribute) => | ||
| inferredConstraints ++= replaceConstraints(predicates - eq, r, l) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. according to https://github.com/apache/spark/pull/27252/files#r378111623 If we have But I'm a bit unsure about how to do it. We may need a variant of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need to touch
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think both this PR and #27518 are beneficial. But I would use
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan This PR support I removed it because:
How about only supporting @peter-toth I'd like to support these cases in #27518:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @wangyum I see, but I think currently you are doing something very different in #27518 see details here: #27518 (comment) I would suggest keeping your #27518 in its current form (but amending its title) and open a new one to address inequalities.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
+1 for supporting the limited case only in this pr. Since this part of optimization can affect many queries, I think we need exhaustive discussions and tests for supporting wider cases. |
||
| case eq @ EqualTo(l: Attribute, r @ Cast(_: Attribute, _, _)) => | ||
| inferredConstraints ++= replaceConstraints(predicates - eq, l, r) | ||
| case _ => // No inference | ||
| } | ||
| inferredConstraints -- constraints | ||
|
|
@@ -75,7 +81,7 @@ trait ConstraintHelper { | |
| private def replaceConstraints( | ||
| constraints: Set[Expression], | ||
| source: Expression, | ||
| destination: Attribute): Set[Expression] = constraints.map(_ transform { | ||
| destination: Expression): Set[Expression] = constraints.map(_ transform { | ||
| case e: Expression if e.semanticEquals(source) => destination | ||
| }) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans._ | |
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.catalyst.rules._ | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types.{IntegerType, LongType} | ||
|
|
||
| class InferFiltersFromConstraintsSuite extends PlanTest { | ||
|
|
||
|
|
@@ -46,8 +47,8 @@ class InferFiltersFromConstraintsSuite extends PlanTest { | |
| y: LogicalPlan, | ||
| expectedLeft: LogicalPlan, | ||
| expectedRight: LogicalPlan, | ||
| joinType: JoinType) = { | ||
| val condition = Some("x.a".attr === "y.a".attr) | ||
| joinType: JoinType, | ||
| condition: Option[Expression] = Some("x.a".attr === "y.a".attr)) = { | ||
| val originalQuery = x.join(y, joinType, condition).analyze | ||
| val correctAnswer = expectedLeft.join(expectedRight, joinType, condition).analyze | ||
| val optimized = Optimize.execute(originalQuery) | ||
|
|
@@ -263,4 +264,36 @@ class InferFiltersFromConstraintsSuite extends PlanTest { | |
| val y = testRelation.subquery('y) | ||
| testConstraintsAfterJoin(x, y, x.where(IsNotNull('a)), y, RightOuter) | ||
| } | ||
|
|
||
| test("Constraints should be inferred from cast equality constraint(filter higher data type)") { | ||
| val testRelation1 = LocalRelation('a.int) | ||
| val testRelation2 = LocalRelation('b.long) | ||
| val originalLeft = testRelation1.subquery('left) | ||
| val originalRight = testRelation2.where('b === 1L).subquery('right) | ||
|
|
||
| val left = testRelation1.where(IsNotNull('a) && 'a.cast(LongType) === 1L).subquery('left) | ||
| val right = testRelation2.where(IsNotNull('b) && 'b === 1L).subquery('right) | ||
|
|
||
| Seq(Some("left.a".attr.cast(LongType) === "right.b".attr), | ||
| Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition => | ||
| testConstraintsAfterJoin(originalLeft, originalRight, left, right, Inner, condition) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In terms of test coverage, its better to test both cases (left/right-side casts)? I have the same comment in the test below. |
||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's also test
|
||
| } | ||
|
|
||
| test("Constraints shouldn't be inferred from cast equality constraint(filter lower data type)") { | ||
| val testRelation1 = LocalRelation('a.int) | ||
| val testRelation2 = LocalRelation('b.long) | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| val originalLeft = testRelation1.where('a > 1).subquery('left) | ||
| val originalRight = testRelation2.where('b < 10).subquery('right) | ||
|
|
||
| val left = testRelation1.where( | ||
| IsNotNull('a) && 'a > 1 && 'a.cast(LongType) < Literal(10).cast(LongType)).subquery('left) | ||
| val right = testRelation2.where( | ||
| IsNotNull('b) && 'b < Literal(10).cast(LongType)).subquery('right) | ||
|
|
||
| Seq(Some("left.a".attr.cast(LongType) === "right.b".attr), | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might be wrong, but I find these test cases a bit confusing because |
||
| Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition => | ||
| testConstraintsAfterJoin(originalLeft, originalRight, left, right, Inner, condition) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We cannot do it like
predicates.foreach {here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. We can do it.