-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-29231][SQL] Constraints should be inferred from cast equality constraint #27252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
048a0ec
34c7001
7dcfe91
2060190
420058b
8dcd076
47eadf4
d283028
d055eba
4b14b3f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,11 +62,22 @@ trait ConstraintHelper { | |
| */ | ||
| def inferAdditionalConstraints(constraints: Set[Expression]): Set[Expression] = { | ||
| var inferredConstraints = Set.empty[Expression] | ||
| constraints.foreach { | ||
| val binaryComparisons = constraints.filter(_.isInstanceOf[BinaryComparison]) | ||
| binaryComparisons.foreach { | ||
| case eq @ EqualTo(l: Attribute, r: Attribute) => | ||
| val candidateConstraints = constraints - eq | ||
| val candidateConstraints = binaryComparisons - eq | ||
| inferredConstraints ++= replaceConstraints(candidateConstraints, l, r) | ||
| inferredConstraints ++= replaceConstraints(candidateConstraints, r, l) | ||
| case eq @ EqualTo(l @ Cast(lc: Attribute, _, tz), r: Attribute) => | ||
| val candidateConstraints = binaryComparisons - eq | ||
| val bridge = Cast(r, lc.dataType, tz) | ||
| inferredConstraints ++= replaceConstraints(candidateConstraints, r, l) | ||
| inferredConstraints ++= replaceConstraints(candidateConstraints, lc, bridge) | ||
|
||
| case eq @ EqualTo(l: Attribute, r @ Cast(rc: Attribute, _, tz)) => | ||
| val candidateConstraints = binaryComparisons - eq | ||
| val bridge = Cast(l, rc.dataType, tz) | ||
| inferredConstraints ++= replaceConstraints(candidateConstraints, l, r) | ||
| inferredConstraints ++= replaceConstraints(candidateConstraints, rc, bridge) | ||
| case _ => // No inference | ||
| } | ||
| inferredConstraints -- constraints | ||
|
|
@@ -75,8 +86,10 @@ trait ConstraintHelper { | |
| private def replaceConstraints( | ||
| constraints: Set[Expression], | ||
| source: Expression, | ||
| destination: Attribute): Set[Expression] = constraints.map(_ transform { | ||
| destination: Expression): Set[Expression] = constraints.map(_ transform { | ||
| case e: Expression if e.semanticEquals(source) => destination | ||
| }).map(_ transform { | ||
| case Cast(e @ Cast(child, _, _), dt, _) if e == destination && dt == child.dataType => child | ||
wangyum marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| }) | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans._ | |
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.catalyst.rules._ | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types.{IntegerType, LongType} | ||
|
|
||
| class InferFiltersFromConstraintsSuite extends PlanTest { | ||
|
|
||
|
|
@@ -46,8 +47,8 @@ class InferFiltersFromConstraintsSuite extends PlanTest { | |
| y: LogicalPlan, | ||
| expectedLeft: LogicalPlan, | ||
| expectedRight: LogicalPlan, | ||
| joinType: JoinType) = { | ||
| val condition = Some("x.a".attr === "y.a".attr) | ||
| joinType: JoinType, | ||
| condition: Option[Expression] = Some("x.a".attr === "y.a".attr)) = { | ||
| val originalQuery = x.join(y, joinType, condition).analyze | ||
| val correctAnswer = expectedLeft.join(expectedRight, joinType, condition).analyze | ||
| val optimized = Optimize.execute(originalQuery) | ||
|
|
@@ -263,4 +264,34 @@ class InferFiltersFromConstraintsSuite extends PlanTest { | |
| val y = testRelation.subquery('y) | ||
| testConstraintsAfterJoin(x, y, x.where(IsNotNull('a)), y, RightOuter) | ||
| } | ||
|
|
||
| test("Constraints should be inferred from cast equality constraint(filter at lower data type)") { | ||
| val testRelation1 = LocalRelation('a.int) | ||
| val testRelation2 = LocalRelation('b.long) | ||
| val originalLeft = testRelation1.where('a === 1).subquery('left) | ||
| val originalRight = testRelation2.subquery('right) | ||
|
|
||
| val left = testRelation1.where(IsNotNull('a) && 'a === 1).subquery('left) | ||
| val right = testRelation2.where(IsNotNull('b) && 'b.cast(IntegerType) === 1).subquery('right) | ||
|
|
||
| Seq(Some("left.a".attr.cast(LongType) === "right.b".attr), | ||
| Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition => | ||
| testConstraintsAfterJoin(originalLeft, originalRight, left, right, Inner, condition) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In terms of test coverage, its better to test both cases (left/right-side casts)? I have the same comment in the test below. |
||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's also test
|
||
| } | ||
|
|
||
| test("Constraints should be inferred from cast equality constraint(filter at higher data type)") { | ||
| val testRelation1 = LocalRelation('a.int) | ||
| val testRelation2 = LocalRelation('b.long) | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| val originalLeft = testRelation1.subquery('left) | ||
| val originalRight = testRelation2.where('b === 1L).subquery('right) | ||
|
|
||
| val left = testRelation1.where(IsNotNull('a) && 'a.cast(LongType) === 1L).subquery('left) | ||
| val right = testRelation2.where(IsNotNull('b) && 'b === 1L).subquery('right) | ||
|
|
||
| Seq(Some("left.a".attr.cast(LongType) === "right.b".attr), | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might be wrong, but I find these test cases a bit confusing because |
||
| Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition => | ||
| testConstraintsAfterJoin(originalLeft, originalRight, left, right, Inner, condition) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do you know what constraints are not
BinaryComparison? I think it's possible, but I can't find some examples.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry. Only
BinaryComparisonis incorrect. for example:int_column = long_column where long_column in (1L, 2L).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The issue fix by 420058b.