-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-4226][SQL] SparkSQL - Add support for subqueries in predicates('in' clause) #3249
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
0a41e91
0134915
152fd23
9e361df
86a4430
4ee8c18
834acda
f1b7d30
dc424df
4afc469
03db47b
a27cca6
7653eee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Conflicts: sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,6 +59,7 @@ class Analyzer(catalog: Catalog, | |
| ResolveGroupingAnalytics :: | ||
| ResolveSortReferences :: | ||
| ImplicitGenerate :: | ||
| SubQueryExpressions :: | ||
| ResolveFunctions :: | ||
| GlobalAggregates :: | ||
| UnresolvedHavingClauseAttributes :: | ||
|
|
@@ -422,6 +423,64 @@ class Analyzer(catalog: Catalog, | |
| Generate(g, join = false, outer = false, None, child) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Transforms the query which has subquery expressions in where clause to left semi join. | ||
| * select T1.x from T1 where T1.x in (select T2.y from T2) transformed to | ||
| * select T1.x from T1 left semi join T2 on T1.x = T2.y. | ||
| */ | ||
| object SubQueryExpressions extends Rule[LogicalPlan] { | ||
|
|
||
| def apply(plan: LogicalPlan): LogicalPlan = plan transform { | ||
| case filter @ Filter(conditions, child) => | ||
| val subqueryExprs = new scala.collection.mutable.ArrayBuffer[SubqueryExpression]() | ||
| val nonSubQueryConds = new scala.collection.mutable.ArrayBuffer[Expression]() | ||
| val transformedConds = conditions.transform{ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Space before Also I would consider doing this in two steps to avoid depending on transform for side effects: a collect to get the list and then a transform to replace with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. Done in two steps. |
||
| // Replace with dummy | ||
| case s @ SubqueryExpression(exp,subquery) => | ||
| subqueryExprs += s | ||
| Literal(true) | ||
| } | ||
| if(subqueryExprs.size > 0) { | ||
| val subqueryExpr = subqueryExprs.remove(0) | ||
| val firstJoin = createLeftSemiJoin( | ||
| child, subqueryExpr.exp, subqueryExpr.child, transformedConds) | ||
| subqueryExprs.foldLeft(firstJoin){case(fj, sq) => | ||
| createLeftSemiJoin(fj, sq.exp, sq.child)} | ||
| } else { | ||
| filter | ||
| } | ||
| } | ||
|
|
||
| def createLeftSemiJoin(left: LogicalPlan, | ||
| expression: Expression, subquery: LogicalPlan, | ||
| parentConds: Expression = null) : LogicalPlan = { | ||
| val (transformedPlan, subqueryConds) = transformAndGetConditions( | ||
| expression, subquery) | ||
| // Unify the parent query conditions and subquery conditions and add these as j0in conditions | ||
| val unifyConds = if (parentConds != null) And(parentConds, subqueryConds) else subqueryConds | ||
| Join(left, transformedPlan, LeftSemi, Some(unifyConds)) | ||
| } | ||
|
|
||
| def transformAndGetConditions(expression: Expression, | ||
| plan: LogicalPlan): (LogicalPlan, Expression) = { | ||
| val expr = new scala.collection.mutable.ArrayBuffer[Expression]() | ||
| val transformedPlan = plan transform { | ||
| case project @ Project(projectList, f @ Filter(condition, child)) => | ||
| expr += EqualTo(expression, projectList(0).asInstanceOf[Expression]) | ||
| expr += condition | ||
| val resolvedChild = ResolveRelations(child) | ||
| // Add the expressions to the projections which are used as filters in subquery | ||
| val toBeAddedExprs = f.references.filter( | ||
| a=>resolvedChild.resolve(a.name, resolver) != None && !projectList.contains(a)) | ||
| Project(projectList ++ toBeAddedExprs, child) | ||
| case project @ Project(projectList, child) => | ||
| expr += EqualTo(expression, projectList(0).asInstanceOf[Expression]) | ||
| project | ||
| } | ||
| (transformedPlan, expr.reduce(And(_, _))) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are not going to handle the non
Subquerycase here right? how about