Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -509,12 +509,7 @@ class Analyzer(
|| !p.pivotColumn.resolved => p
case Pivot(groupByExprsOpt, pivotColumn, pivotValues, aggregates, child) =>
// Check all aggregate expressions.
aggregates.foreach { e =>
if (!isAggregateExpression(e)) {
throw new AnalysisException(
s"Aggregate expression required for pivot, found '$e'")
}
}
aggregates.foreach(checkValidAggregateExpression)
// Group-by expressions coming from SQL are implicit and need to be deduced.
val groupByExprs = groupByExprsOpt.getOrElse(
(child.outputSet -- aggregates.flatMap(_.references) -- pivotColumn.references).toSeq)
Expand Down Expand Up @@ -586,12 +581,16 @@ class Analyzer(
}
}

private def isAggregateExpression(expr: Expression): Boolean = {
expr match {
case Alias(e, _) => isAggregateExpression(e)
case AggregateExpression(_, _, _, _) => true
case _ => false
}
// TODO: Support Pandas UDF.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment about the check and explain what is allowed?

private def checkValidAggregateExpression(expr: Expression): Unit = expr match {
case expr: AggregateExpression =>
checkAggregateFunctionArguments(
expr.aggregateFunction, _.isInstanceOf[AggregateExpression])
case e: Attribute =>
failAnalysis(
s"Aggregate expression required for pivot, but '${e.sql}' " +
s"did not appear in any aggregate function.")
case e => e.children.foreach(checkValidAggregateExpression)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,27 @@ trait CheckAnalysis extends PredicateHelper {
case _ => None
}

protected def checkAggregateFunctionArguments(
aggFunction: Expression,
isAggregateExpression: Expression => Boolean): Unit = {
aggFunction.children.foreach { child =>
child.foreach {
case expr: Expression if isAggregateExpression(expr) =>
failAnalysis(
s"It is not allowed to use an aggregate function in the argument of " +
s"another aggregate function. Please use the inner aggregate function " +
s"in a sub-query.")
case _ => // OK
}

if (!child.deterministic) {
failAnalysis(
s"nondeterministic expression ${child.sql} should not " +
s"appear in the arguments of an aggregate function.")
}
}
}

private def checkLimitClause(limitExpr: Expression): Unit = {
limitExpr match {
case e if !e.foldable => failAnalysis(
Expand Down Expand Up @@ -171,22 +192,7 @@ trait CheckAnalysis extends PredicateHelper {
case agg: AggregateExpression => agg.aggregateFunction
case udf: PythonUDF => udf
}
aggFunction.children.foreach { child =>
child.foreach {
case expr: Expression if isAggregateExpression(expr) =>
failAnalysis(
s"It is not allowed to use an aggregate function in the argument of " +
s"another aggregate function. Please use the inner aggregate function " +
s"in a sub-query.")
case other => // OK
}

if (!child.deterministic) {
failAnalysis(
s"nondeterministic expression ${expr.sql} should not " +
s"appear in the arguments of an aggregate function.")
}
}
checkAggregateFunctionArguments(aggFunction, isAggregateExpression)
case e: Attribute if groupingExprs.isEmpty =>
// Collect all [[AggregateExpressions]]s.
val aggExprs = aggregateExprs.filter(_.collect {
Expand Down
18 changes: 18 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/pivot.sql
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,21 @@ PIVOT (
sum(earnings)
FOR year IN (2012, 2013)
);

-- pivot with complex aggregate expressions
SELECT * FROM (
SELECT year, course, earnings FROM courseSales
)
PIVOT (
ceil(sum(earnings)), avg(earnings) + 1 as a1
FOR course IN ('dotNET', 'Java')
);

-- pivot with invalid arguments in aggregate expressions
SELECT * FROM (
SELECT year, course, earnings FROM courseSales
)
PIVOT (
sum(avg(earnings))
FOR course IN ('dotNET', 'Java')
);
34 changes: 32 additions & 2 deletions sql/core/src/test/resources/sql-tests/results/pivot.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 13
-- Number of queries: 15


-- !query 0
Expand Down Expand Up @@ -176,7 +176,7 @@ PIVOT (
struct<>
-- !query 11 output
org.apache.spark.sql.AnalysisException
Aggregate expression required for pivot, found 'abs(earnings#x)';
Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function.;


-- !query 12
Expand All @@ -192,3 +192,33 @@ struct<>
-- !query 12 output
org.apache.spark.sql.AnalysisException
cannot resolve '`year`' given input columns: [__auto_generated_subquery_name.course, __auto_generated_subquery_name.earnings]; line 4 pos 0


-- !query 13
SELECT * FROM (
SELECT year, course, earnings FROM courseSales
)
PIVOT (
ceil(sum(earnings)), avg(earnings) + 1 as a1
FOR course IN ('dotNET', 'Java')
)
-- !query 13 schema
struct<year:int,dotNET_CEIL(sum(CAST(earnings AS BIGINT))):bigint,dotNET_a1:double,Java_CEIL(sum(CAST(earnings AS BIGINT))):bigint,Java_a1:double>
-- !query 13 output
2012 15000 7501.0 20000 20001.0
2013 48000 48001.0 30000 30001.0


-- !query 14
SELECT * FROM (
SELECT year, course, earnings FROM courseSales
)
PIVOT (
sum(avg(earnings))
FOR course IN ('dotNET', 'Java')
)
-- !query 14 schema
struct<>
-- !query 14 output
org.apache.spark.sql.AnalysisException
It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is related to this pr? I think the output does not change with/without this pr.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. I think it's still worth adding such a test for pivot.
But you reminded me that I might not need to check the aggregate function arguments here and leave it to CheckAnalysis since this check is independent of the context and always outputs the same error message. WDYT, @maropu and @gatorsmile ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding this test is just to improve the test coverage. It looks reasonable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But you reminded me that I might not need to check the aggregate function arguments here and leave it to CheckAnalysis since this check is independent of the context and always outputs the same error message.

The general principle in our Analyzer is do the error handling in CheckAnalysis, unless a better (more readable) error message can be issued from the rule.