Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,22 @@ trait ConstraintHelper {
*/
def inferAdditionalConstraints(constraints: Set[Expression]): Set[Expression] = {
var inferredConstraints = Set.empty[Expression]
constraints.foreach {
val binaryComparisons = constraints.filter(_.isInstanceOf[BinaryComparison])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you know what constraints are not BinaryComparison? I think it's possible, but I can't find some examples.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry. Only BinaryComparison is incorrect. for example: int_column = long_column where long_column in (1L, 2L).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue fix by 420058b.

binaryComparisons.foreach {
case eq @ EqualTo(l: Attribute, r: Attribute) =>
val candidateConstraints = constraints - eq
val candidateConstraints = binaryComparisons - eq
inferredConstraints ++= replaceConstraints(candidateConstraints, l, r)
inferredConstraints ++= replaceConstraints(candidateConstraints, r, l)
case eq @ EqualTo(l @ Cast(lc: Attribute, _, tz), r: Attribute) =>
val candidateConstraints = binaryComparisons - eq
val bridge = Cast(r, lc.dataType, tz)
inferredConstraints ++= replaceConstraints(candidateConstraints, r, l)
inferredConstraints ++= replaceConstraints(candidateConstraints, lc, bridge)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this safe? cast is not reversible. cast(int_col as long) = long_col can have different result than int_col = cast(long_col as int).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2060190: Makes it only support filter at higher data type.

case eq @ EqualTo(l: Attribute, r @ Cast(rc: Attribute, _, tz)) =>
val candidateConstraints = binaryComparisons - eq
val bridge = Cast(l, rc.dataType, tz)
inferredConstraints ++= replaceConstraints(candidateConstraints, l, r)
inferredConstraints ++= replaceConstraints(candidateConstraints, rc, bridge)
case _ => // No inference
}
inferredConstraints -- constraints
Expand All @@ -75,8 +86,10 @@ trait ConstraintHelper {
private def replaceConstraints(
constraints: Set[Expression],
source: Expression,
destination: Attribute): Set[Expression] = constraints.map(_ transform {
destination: Expression): Set[Expression] = constraints.map(_ transform {
case e: Expression if e.semanticEquals(source) => destination
}).map(_ transform {
case Cast(e @ Cast(child, _, _), dt, _) if e == destination && dt == child.dataType => child
})

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{IntegerType, LongType}

class InferFiltersFromConstraintsSuite extends PlanTest {

Expand All @@ -46,8 +47,8 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
y: LogicalPlan,
expectedLeft: LogicalPlan,
expectedRight: LogicalPlan,
joinType: JoinType) = {
val condition = Some("x.a".attr === "y.a".attr)
joinType: JoinType,
condition: Option[Expression] = Some("x.a".attr === "y.a".attr)) = {
val originalQuery = x.join(y, joinType, condition).analyze
val correctAnswer = expectedLeft.join(expectedRight, joinType, condition).analyze
val optimized = Optimize.execute(originalQuery)
Expand Down Expand Up @@ -263,4 +264,34 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
val y = testRelation.subquery('y)
testConstraintsAfterJoin(x, y, x.where(IsNotNull('a)), y, RightOuter)
}

test("Constraints should be inferred from cast equality constraint(filter at lower data type)") {
val testRelation1 = LocalRelation('a.int)
val testRelation2 = LocalRelation('b.long)
val originalLeft = testRelation1.where('a === 1).subquery('left)
val originalRight = testRelation2.subquery('right)

val left = testRelation1.where(IsNotNull('a) && 'a === 1).subquery('left)
val right = testRelation2.where(IsNotNull('b) && 'b.cast(IntegerType) === 1).subquery('right)

Seq(Some("left.a".attr.cast(LongType) === "right.b".attr),
Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition =>
testConstraintsAfterJoin(originalLeft, originalRight, left, right, Inner, condition)
Copy link
Member

@maropu maropu Feb 12, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In terms of test coverage, its better to test both cases (left/right-side casts)? I have the same comment in the test below.

}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's also test cast(int) here. The key is: we should test both left side cast and right side cast, as @maropu said.

"left.a".attr.cast(LongType) === "right.b".attr and "right.b".attr === "left.a".attr.cast(LongType) only test left side cast (join left side, not EqualTo left side).

}

test("Constraints should be inferred from cast equality constraint(filter at higher data type)") {
val testRelation1 = LocalRelation('a.int)
val testRelation2 = LocalRelation('b.long)
val originalLeft = testRelation1.subquery('left)
val originalRight = testRelation2.where('b === 1L).subquery('right)

val left = testRelation1.where(IsNotNull('a) && 'a.cast(LongType) === 1L).subquery('left)
val right = testRelation2.where(IsNotNull('b) && 'b === 1L).subquery('right)

Seq(Some("left.a".attr.cast(LongType) === "right.b".attr),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be wrong, but I find these test cases a bit confusing because left and right have equality filters. Eg. here 'a === 1 in left so actually it would be correct to infer 1.cast(LongType) === 'b for right. This PR doesn't do that obviously (#27518 will address that) but probably using inequalities ('a < 1) would be easier to follow.

Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition =>
testConstraintsAfterJoin(originalLeft, originalRight, left, right, Inner, condition)
}
}
}