-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-13306] [SQL] uncorrelated scalar subquery #11190
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
0665a69
236ac88
016c36c
a4bae33
d0974cf
3a8f08d
7596173
0034172
e082845
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -204,47 +204,8 @@ class CatalystQlSuite extends PlanTest { | |
| } | ||
|
|
||
| test("subquery") { | ||
| comparePlans( | ||
| parser.parsePlan("select (select max(b) from s) ss from t"), | ||
| Project( | ||
| UnresolvedAlias( | ||
| Alias( | ||
| ScalarSubquery( | ||
| Project( | ||
| UnresolvedAlias( | ||
| UnresolvedFunction("max", UnresolvedAttribute("b") :: Nil, false)) :: Nil, | ||
| UnresolvedRelation(TableIdentifier("s")))), | ||
| "ss")(ExprId(0))) :: Nil, | ||
| UnresolvedRelation(TableIdentifier("t")))) | ||
| comparePlans( | ||
| parser.parsePlan("select * from t where a = (select b from s)"), | ||
| Project( | ||
| UnresolvedAlias( | ||
| UnresolvedStar(None)) :: Nil, | ||
| Filter( | ||
| EqualTo( | ||
| UnresolvedAttribute("a"), | ||
| ScalarSubquery( | ||
| Project( | ||
| UnresolvedAlias( | ||
| UnresolvedAttribute("b")) :: Nil, | ||
| UnresolvedRelation(TableIdentifier("s"))))), | ||
| UnresolvedRelation(TableIdentifier("t"))))) | ||
| comparePlans( | ||
| parser.parsePlan("select * from t group by g having a > (select b from s)"), | ||
| Filter( | ||
| Cast( | ||
| GreaterThan( | ||
| UnresolvedAttribute("a"), | ||
| ScalarSubquery( | ||
| Project( | ||
| UnresolvedAlias( | ||
| UnresolvedAttribute("b")) :: Nil, | ||
| UnresolvedRelation(TableIdentifier("s"))))), | ||
| BooleanType), | ||
| Aggregate( | ||
| UnresolvedAttribute("g") :: Nil, | ||
| UnresolvedAlias(UnresolvedStar(None)) :: Nil, | ||
| UnresolvedRelation(TableIdentifier("t"))))) | ||
| parser.parsePlan("select (select max(b) from s) ss from t") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only thing we are testing here is that things don't go really really wrong. I'd prefer it if we test the plan as well.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since plan checking is too easy to break, I added test for plan, finally remove them.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok that makes sense. |
||
| parser.parsePlan("select * from t where a = (select b from s)") | ||
| parser.parsePlan("select * from t group by g having a > (select b from s)") | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -127,8 +127,8 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ | |
| doPrepare() | ||
|
|
||
| // collect all the subqueries and submit jobs to execute them in background | ||
| val queryResults = ArrayBuffer[(SparkScalarSubquery, Future[Array[InternalRow]])]() | ||
| val allSubqueries = expressions.flatMap(_.collect {case e: SparkScalarSubquery => e}) | ||
| val queryResults = ArrayBuffer[(ScalarSubquery, Future[Array[InternalRow]])]() | ||
| val allSubqueries = expressions.flatMap(_.collect {case e: ScalarSubquery => e}) | ||
| allSubqueries.foreach { e => | ||
| val futureResult = Future { | ||
| e.plan.executeCollect() | ||
|
||
|
|
@@ -146,9 +146,12 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ | |
| sys.error(s"Scalar subquery should return at most one row, but got ${rows.length}: " + | ||
|
||
| s"${e.query.treeString}") | ||
| } | ||
| // Analyzer will make sure that it only return on column | ||
| if (rows.length > 0) { | ||
|
||
| assert(rows(0).numFields == 1, "Analyzer should make sure this only returns one column") | ||
| e.updateResult(rows(0).get(0, e.dataType)) | ||
| } else { | ||
| // the result should be null, since the expression already have null as default value, | ||
| // we don't need to update that. | ||
|
||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,8 +18,8 @@ | |
| package org.apache.spark.sql.execution | ||
|
|
||
| import org.apache.spark.sql.SQLContext | ||
| import org.apache.spark.sql.catalyst.InternalRow | ||
| import org.apache.spark.sql.catalyst.expressions.{ExprId, ScalarSubquery, SubqueryExpression} | ||
| import org.apache.spark.sql.catalyst.{expressions, InternalRow} | ||
| import org.apache.spark.sql.catalyst.expressions.{ExprId, SubqueryExpression} | ||
| import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer} | ||
| import org.apache.spark.sql.catalyst.rules.Rule | ||
|
|
@@ -30,7 +30,7 @@ import org.apache.spark.sql.types.DataType | |
| * | ||
| * This is the physical copy of ScalarSubquery to be used inside SparkPlan. | ||
| */ | ||
| case class SparkScalarSubquery( | ||
| case class ScalarSubquery( | ||
| @transient executedPlan: SparkPlan, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'd just call this physicalPlan, or plan.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually plan is probably the best.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| exprId: ExprId) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. document why we need exprId (for explain string output?)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please document it. |
||
| extends SubqueryExpression with CodegenFallback { | ||
|
||
|
|
@@ -58,14 +58,13 @@ case class SparkScalarSubquery( | |
| /** | ||
| * Convert the subquery from logical plan into executed plan. | ||
| */ | ||
| private[sql] case class ConvertSubquery(sqlContext: SQLContext) extends Rule[SparkPlan] { | ||
| private[sql] case class PlanSubqueries(sqlContext: SQLContext) extends Rule[SparkPlan] { | ||
| def apply(plan: SparkPlan): SparkPlan = { | ||
| plan.transformAllExpressions { | ||
| // Only scalar subquery will be executed separately, all others will be written as join. | ||
| case subquery: ScalarSubquery => | ||
| case subquery: expressions.ScalarSubquery => | ||
| val sparkPlan = sqlContext.planner.plan(ReturnAnswer(subquery.query)).next() | ||
| val executedPlan = sqlContext.prepareForExecution.execute(sparkPlan) | ||
| SparkScalarSubquery(executedPlan, subquery.exprId) | ||
| ScalarSubquery(executedPlan, subquery.exprId) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is an helper function used in Analyzer and Optimizer, or we need to do type conversion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the base class for both logical plan and physical plan, kind of weird. This is to make the generateTreeString works in QueryPlan
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Analyzer and Optimizer only applies to logical plan right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes