-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-24424][SQL] Support ANSI-SQL compliant syntax for GROUPING SET #21813
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
b5ada3f
ac8f04f
7cf187d
e0c57f7
2ecf3e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -442,17 +442,32 @@ class Analyzer( | |
| child: LogicalPlan): LogicalPlan = { | ||
| val gid = AttributeReference(VirtualColumn.groupingIdName, IntegerType, false)() | ||
|
|
||
| val finalGroupByExpressions = if (groupByExprs == Nil) { | ||
| selectedGroupByExprs.flatten.foldLeft(Seq.empty[Expression]) { (result, currentExpr) => | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya No. We should be getting an error as we don't have a group by specification. I had tried this scenario against db2 to double check.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we have a test case for it too?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya Yeah.. was already adding it .. knew u would ask :-)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dilipbiswal Thanks! :-) |
||
| // Only unique expressions are included in the group by expressions and is determined | ||
| // based on their semantic equality. Example. grouping sets ((a * b), (b * a)) results | ||
| // in grouping expression (a * b) | ||
| if (result.find(_.semanticEquals(currentExpr)).isDefined) { | ||
| result | ||
| } else { | ||
| result :+ currentExpr | ||
| } | ||
| } | ||
| } else { | ||
| groupByExprs | ||
| } | ||
|
|
||
| // Expand works by setting grouping expressions to null as determined by the | ||
| // `selectedGroupByExprs`. To prevent these null values from being used in an aggregate | ||
| // instead of the original value we need to create new aliases for all group by expressions | ||
| // that will only be used for the intended purpose. | ||
| val groupByAliases = constructGroupByAlias(groupByExprs) | ||
| val groupByAliases = constructGroupByAlias(finalGroupByExpressions) | ||
|
|
||
| val expand = constructExpand(selectedGroupByExprs, child, groupByAliases, gid) | ||
| val groupingAttrs = expand.output.drop(child.output.length) | ||
|
|
||
| val aggregations = constructAggregateExprs( | ||
| groupByExprs, aggregationExprs, groupByAliases, groupingAttrs, gid) | ||
| finalGroupByExpressions, aggregationExprs, groupByAliases, groupingAttrs, gid) | ||
|
|
||
| Aggregate(groupingAttrs, aggregations, expand) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -91,6 +91,40 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest { | |
| assertAnalysisError(originalPlan3, Seq("doesn't show up in the GROUP BY list")) | ||
| } | ||
|
|
||
| test("grouping sets with no explicit group by expressions") { | ||
| val originalPlan = GroupingSets(Seq(Seq(), Seq(unresolved_a), Seq(unresolved_a, unresolved_b)), | ||
| Nil, r1, | ||
| Seq(unresolved_a, unresolved_b, UnresolvedAlias(count(unresolved_c)))) | ||
| val expected = Aggregate(Seq(a, b, gid), Seq(a, b, count(c).as("count(c)")), | ||
| Expand( | ||
| Seq(Seq(a, b, c, nulInt, nulStr, 3), Seq(a, b, c, a, nulStr, 1), Seq(a, b, c, a, b, 0)), | ||
| Seq(a, b, c, a, b, gid), | ||
| Project(Seq(a, b, c, a.as("a"), b.as("b")), r1))) | ||
| checkAnalysis(originalPlan, expected) | ||
|
|
||
| val originalPlan2 = GroupingSets(Seq(Seq(), Seq(unresolved_a), Seq(unresolved_a, unresolved_b)), | ||
| Nil, r1, | ||
| Seq(unresolved_a, unresolved_b, UnresolvedAlias(count(unresolved_c)))) | ||
|
||
| checkAnalysis(originalPlan2, expected) | ||
|
|
||
|
|
||
| // Computation of grouping expression should remove duplicate expression based on their | ||
| // semantics (semanticEqual). | ||
| val originalPlan3 = GroupingSets(Seq(Seq(Multiply(unresolved_a, Literal(2))), | ||
| Seq(Multiply(Literal(2), unresolved_a), unresolved_b)), Nil, r1, | ||
| Seq(UnresolvedAlias(Multiply(unresolved_a, Literal(2))), | ||
| unresolved_b, UnresolvedAlias(count(unresolved_c)))) | ||
|
|
||
| val resultPlan = getAnalyzer(true).executeAndCheck(originalPlan3) | ||
| val gExpressions = resultPlan.asInstanceOf[Aggregate].groupingExpressions | ||
| assert(gExpressions.size == 3) | ||
| val firstGroupingExprAttrName = | ||
| gExpressions(0).asInstanceOf[AttributeReference].name.replaceAll("#[0-9]*", "#0") | ||
| assert(firstGroupingExprAttrName == "(a#0 * 2)") | ||
| assert(gExpressions(1).asInstanceOf[AttributeReference].name == "b") | ||
| assert(gExpressions(2).asInstanceOf[AttributeReference].name == VirtualColumn.groupingIdName) | ||
| } | ||
|
|
||
| test("cube") { | ||
| val originalPlan = Aggregate(Seq(Cube(Seq(unresolved_a, unresolved_b))), | ||
| Seq(unresolved_a, unresolved_b, UnresolvedAlias(count(unresolved_c))), r1) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,5 +13,39 @@ SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a)); | |
| -- SPARK-17849: grouping set throws NPE #3 | ||
| SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c)); | ||
|
|
||
| -- Group sets without explicit group by | ||
| SELECT c1, sum(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1); | ||
|
|
||
| -- Group sets without group by and with grouping | ||
| SELECT c1, sum(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1); | ||
|
|
||
| -- Mutiple grouping within a grouping set | ||
| SELECT c1, c2, Sum(c3), grouping__id | ||
| FROM (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3) | ||
| GROUP BY GROUPING SETS ( ( c1 ), ( c2 ) ) | ||
| HAVING GROUPING__ID > 1; | ||
|
|
||
| -- Group sets without explicit group by | ||
| SELECT grouping(c1) FROM (VALUES ('x', 'a', 10), ('y', 'b', 20)) AS t (c1, c2, c3) GROUP BY c1,c2 GROUPING SETS (c1,c2); | ||
|
||
|
|
||
| -- Mutiple grouping within a grouping set | ||
| SELECT -c1 AS c1 FROM (values (1,2), (3,2)) t(c1, c2) GROUP BY GROUPING SETS ((c1), (c1, c2)); | ||
|
|
||
| -- complex expression in grouping sets | ||
| SELECT a + b, b, sum(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b)); | ||
|
|
||
| -- complex expression in grouping sets | ||
| SELECT a + b, b, sum(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b)); | ||
|
|
||
| -- more query constructs with grouping sets | ||
| SELECT c1 AS col1, c2 AS col2 | ||
| FROM (VALUES (1, 2), (3, 2)) t(c1, c2) | ||
| GROUP BY GROUPING SETS ( ( c1 ), ( c1, c2 ) ) | ||
| HAVING col2 IS NOT NULL | ||
| ORDER BY -col1; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've manually verified the results should be correct.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya Sorry Simon.. do i have to do something for this comment ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not at all. @dilipbiswal :-) |
||
|
|
||
| -- negative tests - must have at least one grouping expression | ||
| SELECT a, b, c, count(d) FROM grouping GROUP BY WITH ROLLUP; | ||
|
|
||
| SELECT a, b, c, count(d) FROM grouping GROUP BY WITH CUBE; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't we do this in the branch of
case x: GroupingSets if x.expressions.forall(_.resolved) =>? I think thisconstructAggregatemethod is also used by other clauses likeCubeandRollup.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@viirya Yeah.. so for cube and rollup, we will always have groupByExprs setup right ? So i felt its better to keep the code consolidated here in this function. What do u think ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. Mind to add a comment on this like
SPARK-24424: this only happens for ANSI-SQL compliant syntax for GROUPING SET?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@viirya Sure will do.