-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20334][SQL] Return a better error message when correlated predicates contain aggregate expression that has mixture of outer and local references. #17636
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a266c8d
bb1bdad
ff88651
c4e1a01
af3d367
55c64ca
d986ddc
2411f3e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1210,6 +1210,29 @@ class Analyzer( | |
| private def checkAndGetOuterReferences(sub: LogicalPlan): Seq[Expression] = { | ||
| val outerReferences = ArrayBuffer.empty[Expression] | ||
|
|
||
| // Validate that correlated aggregate expression do not contain a mixture | ||
| // of outer and local references. | ||
| def checkMixedReferencesInsideAggregation(expr: Expression): Unit = { | ||
| expr.foreach { | ||
| case a: AggregateExpression if containsOuter(a) => | ||
| val outer = a.collect { case OuterReference(e) => e.toAttribute } | ||
| val local = a.references -- outer | ||
| if (local.nonEmpty) { | ||
| val msg = | ||
| s""" | ||
| |Found an aggregate expression in a correlated predicate that has both | ||
| |outer and local references, which is not supported yet. | ||
| |Aggregate expression: ${a.sql} | ||
| |Outer references: ${outer.map(_.sql).mkString(", ")} | ||
| |Local references: ${local.map(_.sql).mkString(", ")} | ||
|
||
| """. | ||
|
||
| stripMargin.replace("\n", " ").trim() | ||
| failAnalysis(msg) | ||
| } | ||
| case _ => | ||
| } | ||
| } | ||
|
|
||
| // Make sure a plan's subtree does not contain outer references | ||
| def failOnOuterReferenceInSubTree(p: LogicalPlan): Unit = { | ||
| if (hasOuterReferences(p)) { | ||
|
|
@@ -1219,6 +1242,7 @@ class Analyzer( | |
|
|
||
| // Make sure a plan's expressions do not contain outer references | ||
|
||
| def failOnOuterReference(p: LogicalPlan): Unit = { | ||
|
||
| p.expressions.foreach(checkMixedReferencesInsideAggregation) | ||
|
||
| if (p.expressions.exists(containsOuter)) { | ||
| failAnalysis( | ||
| "Expressions referencing the outer query are not supported outside of WHERE/HAVING " + | ||
|
|
@@ -1305,6 +1329,8 @@ class Analyzer( | |
| case _: EqualTo | _: EqualNullSafe => false | ||
| case _ => true | ||
| } | ||
|
|
||
| correlated.foreach(checkMixedReferencesInsideAggregation(_)) | ||
|
||
| // The aggregate expressions are treated in a special way by getOuterReferences. If the | ||
| // aggregate expression contains only outer reference attributes then the entire aggregate | ||
| // expression is isolated as an OuterReference. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -367,6 +367,8 @@ case class OuterReference(e: NamedExpression) | |
| override def exprId: ExprId = e.exprId | ||
| override def toAttribute: Attribute = e.toAttribute | ||
| override def newInstance(): NamedExpression = OuterReference(e.newInstance()) | ||
| override def sql: String = e.sql | ||
| override def toString: String = e.toString | ||
|
||
| } | ||
|
|
||
| object VirtualColumn { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,42 +1,72 @@ | ||
| -- The test file contains negative test cases | ||
| -- of invalid queries where error messages are expected. | ||
|
|
||
| create temporary view t1 as select * from values | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Those just change for case, right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya Yeah.. since i was on this test case, thought i should fix the case. |
||
| CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES | ||
| (1, 2, 3) | ||
| as t1(t1a, t1b, t1c); | ||
| AS t1(t1a, t1b, t1c); | ||
|
|
||
| create temporary view t2 as select * from values | ||
| CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES | ||
| (1, 0, 1) | ||
| as t2(t2a, t2b, t2c); | ||
| AS t2(t2a, t2b, t2c); | ||
|
|
||
| create temporary view t3 as select * from values | ||
| CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES | ||
| (3, 1, 2) | ||
| as t3(t3a, t3b, t3c); | ||
| AS t3(t3a, t3b, t3c); | ||
|
|
||
| -- TC 01.01 | ||
| -- The column t2b in the SELECT of the subquery is invalid | ||
| -- because it is neither an aggregate function nor a GROUP BY column. | ||
| select t1a, t2b | ||
| from t1, t2 | ||
| where t1b = t2c | ||
| and t2b = (select max(avg) | ||
| from (select t2b, avg(t2b) avg | ||
| from t2 | ||
| where t2a = t1.t1b | ||
| SELECT t1a, t2b | ||
| FROM t1, t2 | ||
| WHERE t1b = t2c | ||
| AND t2b = (SELECT max(avg) | ||
| FROM (SELECT t2b, avg(t2b) avg | ||
| FROM t2 | ||
| WHERE t2a = t1.t1b | ||
| ) | ||
| ) | ||
| ; | ||
|
|
||
| -- TC 01.02 | ||
| -- Invalid due to the column t2b not part of the output from table t2. | ||
| select * | ||
| from t1 | ||
| where t1a in (select min(t2a) | ||
| from t2 | ||
| group by t2c | ||
| having t2c in (select max(t3c) | ||
| from t3 | ||
| group by t3b | ||
| having t3b > t2b )) | ||
| SELECT * | ||
| FROM t1 | ||
| WHERE t1a in (SELECT min(t2a) | ||
|
||
| FROM t2 | ||
| GROUP by t2c | ||
|
||
| HAVING t2c IN (SELECT max(t3c) | ||
| FROM t3 | ||
| GROUP BY t3b | ||
| HAVING t3b > t2b )) | ||
| ; | ||
|
|
||
| -- TC 01.03 | ||
| -- Invalid due to mixure of outer and local references under an AggegatedExpression | ||
| -- in a correlated predicate | ||
| SELECT t1a | ||
| FROM t1 | ||
| GROUP BY 1 | ||
| HAVING EXISTS (SELECT 1 | ||
| FROM t2 | ||
| WHERE t2a < min(t1a + t2a)); | ||
|
|
||
| -- TC 01.04 | ||
| -- Invalid due to mixure of outer and local references under an AggegatedExpression | ||
| SELECT t1a | ||
| FROM t1 | ||
| WHERE t1a IN (SELECT t2a | ||
| FROM t2 | ||
| WHERE EXISTS (SELECT 1 | ||
| FROM t3 | ||
| GROUP BY 1 | ||
| HAVING min(t2a + t3a) > 1)); | ||
|
|
||
| -- TC 01.05 | ||
| -- Invalid due to outer reference appearing in projection list | ||
| SELECT t1a | ||
| FROM t1 | ||
| WHERE t1a IN (SELECT t2a | ||
| FROM t2 | ||
| WHERE EXISTS (SELECT min(t2a) | ||
| FROM t3)); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: checkMixedReferencesInsideAggregation -> checkMixedReferencesInsideAggregationExpr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@viirya Will change it to checkMixedReferencesInsideAggregateExpr