simplify

apache · peter-toth · Mar 21, 2021 · Mar 21, 2021 · Mar 22, 2021 · Mar 22, 2021
commit c2ba80457bd86d11ad26311bbc3c42607f33b19a
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -430,7 +430,7 @@ class Analyzer(override val catalogManager: CatalogManager)
 
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
       case Aggregate(groups, aggs, child) if child.resolved && hasUnresolvedAlias(aggs) =>
-        Aggregate(groups, assignAliases(aggs), child, false)
+        Aggregate(groups, assignAliases(aggs), child)
 
       case Pivot(groupByOpt, pivotColumn, pivotValues, aggregates, child)
         if child.resolved && groupByOpt.isDefined && hasUnresolvedAlias(groupByOpt.get) =>
@@ -599,7 +599,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       val aggregations = constructAggregateExprs(
         finalGroupByExpressions, aggregationExprs, groupByAliases, groupingAttrs, gid)
 
-      Aggregate(groupingAttrs, aggregations, expand, false)
+      Aggregate(groupingAttrs, aggregations, expand)
     }
 
     private def findGroupingExprs(plan: LogicalPlan): Seq[Expression] = {
@@ -746,15 +746,14 @@ class Analyzer(override val catalogManager: CatalogManager)
             case _ => Alias(pivotColumn, "__pivot_col")()
           }
           val bigGroup = groupByExprs :+ namedPivotCol
-          val firstAgg = Aggregate(bigGroup, bigGroup ++ namedAggExps, child, false)
+          val firstAgg = Aggregate(bigGroup, bigGroup ++ namedAggExps, child)
           val pivotAggs = namedAggExps.map { a =>
             Alias(PivotFirst(namedPivotCol.toAttribute, a.toAttribute, evalPivotValues)
               .toAggregateExpression()
             , "__pivot_" + a.sql)()
           }
           val groupByExprsAttr = groupByExprs.map(_.toAttribute)
-          val secondAgg =
-            Aggregate(groupByExprsAttr, groupByExprsAttr ++ pivotAggs, firstAgg, false)
+          val secondAgg = Aggregate(groupByExprsAttr, groupByExprsAttr ++ pivotAggs, firstAgg)
           val pivotAggAttribute = pivotAggs.map(_.toAttribute)
           val pivotOutputs = pivotValues.zipWithIndex.flatMap { case (value, i) =>
             aggregates.zip(pivotAggAttribute).map { case (aggregate, pivotAtt) =>
@@ -791,7 +790,7 @@ class Analyzer(override val catalogManager: CatalogManager)
               Alias(filteredAggregate, outputName(value, aggregate))()
             }
           }
-          Aggregate(groupByExprs, groupByExprs ++ pivotAggregates, child, false)
+          Aggregate(groupByExprs, groupByExprs ++ pivotAggregates, child)
         }
     }
 
@@ -1407,8 +1406,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         if (a.groupingExpressions.exists(_.isInstanceOf[UnresolvedOrdinal])) {
           throw QueryCompilationErrors.starNotAllowedWhenGroupByOrdinalPositionUsedError()
         } else {
-          a.copy(aggrExprWithGroupingRefs =
-            buildExpandedProjectList(a.aggregateExpressions, a.child))
+          a.copy(aggregateExpressions = buildExpandedProjectList(a.aggregateExpressions, a.child))
         }
       // If the script transformation input contains Stars, expand it.
       case t: ScriptTransformation if containsStar(t.input) =>
@@ -1821,7 +1819,7 @@ class Analyzer(override val catalogManager: CatalogManager)
             throw QueryCompilationErrors.groupByPositionRangeError(index, aggs.size, ordinal)
           case o => o
         }
-        Aggregate(newGroups, aggs, child, false)
+        Aggregate(newGroups, aggs, child)
     }
   }
 
@@ -1919,8 +1917,7 @@ class Analyzer(override val catalogManager: CatalogManager)
             val missingAttrs = (AttributeSet(newExprs) -- a.outputSet).intersect(newChild.outputSet)
             if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) {
               // All the missing attributes are grouping expressions, valid case.
-              (newExprs,
-                a.copy(aggrExprWithGroupingRefs = aggExprs ++ missingAttrs, child = newChild))
+              (newExprs, a.copy(aggregateExpressions = aggExprs ++ missingAttrs, child = newChild))
             } else {
               // Need to add non-grouping attributes, invalid case.
               (exprs, a)
@@ -2241,7 +2238,7 @@ class Analyzer(override val catalogManager: CatalogManager)
   object GlobalAggregates extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case Project(projectList, child) if containsAggregates(projectList) =>
-        Aggregate(Nil, projectList, child, false)
+        Aggregate(Nil, projectList, child)
     }
 
     def containsAggregates(exprs: Seq[Expression]): Boolean = {
@@ -2290,7 +2287,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           val aliasedOrdering = unresolvedSortOrders.map(o => Alias(o.child, "aggOrder")())
 
           val aggregateWithExtraOrdering = aggregate.copy(
-            aggrExprWithGroupingRefs = aggregate.aggregateExpressions ++ aliasedOrdering)
+            aggregateExpressions = aggregate.aggregateExpressions ++ aliasedOrdering)
 
           val resolvedAggregate: Aggregate =
             executeSameContext(aggregateWithExtraOrdering).asInstanceOf[Aggregate]
@@ -2344,7 +2341,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           } else {
             Project(aggregate.output,
               Sort(finalSortOrders, global,
-                aggregate.copy(aggrExprWithGroupingRefs = originalAggExprs ++ needsPushDown)))
+                aggregate.copy(aggregateExpressions = originalAggExprs ++ needsPushDown)))
           }
         } catch {
           // Attempting to resolve in the aggregate can result in ambiguity.  When this happens,
@@ -2371,8 +2368,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           Aggregate(
             agg.groupingExpressions,
             Alias(filterCond, "havingCondition")() :: Nil,
-            agg.child,
-            false)
+            agg.child)
         val resolvedOperator = executeSameContext(aggregatedCondition)
         def resolvedAggregateFilter =
           resolvedOperator
@@ -2427,7 +2423,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         val (aggregateExpressions, resolvedHavingCond) = resolvedInfo.get
         Project(agg.output,
           Filter(resolvedHavingCond,
-            agg.copy(aggrExprWithGroupingRefs = agg.aggregateExpressions ++ aggregateExpressions)))
+            agg.copy(aggregateExpressions = agg.aggregateExpressions ++ aggregateExpressions)))
       } else {
         filter
       }
@@ -2557,7 +2553,7 @@ class Analyzer(override val catalogManager: CatalogManager)
               other :: Nil
           }
 
-        val newAgg = Aggregate(groupList, newAggList, child, false)
+        val newAgg = Aggregate(groupList, newAggList, child)
         Project(projectExprs.toList, newAgg)
 
       case p @ Project(projectList, _) if hasAggFunctionInGenerator(projectList) =>
@@ -2867,7 +2863,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           a.expressions.forall(_.resolved) =>
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
-        val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child, false)
+        val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
         // Add a Filter operator for conditions in the Having clause.
         val withFilter = Filter(condition, withAggregate)
         val withWindow = addWindow(windowExpressions, withFilter)
@@ -2884,7 +2880,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           a.expressions.forall(_.resolved) =>
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
-        val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child, false)
+        val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
         // Add Window operators.
         val withWindow = addWindow(windowExpressions, withAggregate)
 
@@ -3542,7 +3538,7 @@ object CleanupAliases extends Rule[LogicalPlan] with AliasHelper {
 
     case Aggregate(grouping, aggs, child) =>
       val cleanedAggs = aggs.map(trimNonTopLevelAliases)
-      Aggregate(grouping.map(trimAliases), cleanedAggs, child, false)
+      Aggregate(grouping.map(trimAliases), cleanedAggs, child)
 
     case Window(windowExprs, partitionSpec, orderSpec, child) =>
       val cleanedWindowExprs = windowExprs.map(trimNonTopLevelAliases)

diff --git a/...catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala b/...catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
@@ -167,7 +167,7 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
       case oldVersion @ Aggregate(_, aggregateExpressions, _)
           if findAliases(aggregateExpressions).intersect(conflictingAttributes).nonEmpty =>
         Seq((oldVersion, oldVersion.copy(
-          aggrExprWithGroupingRefs = newAliases(aggregateExpressions))))
+          aggregateExpressions = newAliases(aggregateExpressions))))
 
       // We don't search the child plan recursively for the same reason as the above Project.
       case _ @ Aggregate(_, aggregateExpressions, _)

diff --git a/...t/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/...t/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -107,8 +107,7 @@ object UnsupportedOperationChecker extends Logging {
         // Since the Distinct node will be replaced to Aggregate in the optimizer rule
         // [[ReplaceDistinctWithAggregate]], here we also need to check all Distinct node by
         // assuming it as Aggregate.
-        case d @ Distinct(c: LogicalPlan) if d.isStreaming =>
-          Aggregate(c.output, c.output, c, false)
+        case d @ Distinct(c: LogicalPlan) if d.isStreaming => Aggregate(c.output, c.output, c)
       }
     }
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateNullability.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateNullability.scala
@@ -62,12 +62,12 @@ object UpdateGroupingExprRefNullability extends Rule[LogicalPlan] {
     case a: Aggregate =>
       val nullabilities = a.groupingExpressions.map(_.nullable).toArray
 
-      val newAggrExprWithGroupingRefs =
-        a.aggrExprWithGroupingRefs.map(_.transform {
+      val newAggregateExpressions =
+        a.aggregateExpressions.map(_.transform {
           case g: GroupingExprRef if g.nullable != nullabilities(g.ordinal) =>
             g.copy(nullable = nullabilities(g.ordinal))
         }.asInstanceOf[NamedExpression])
 
-      a.copy(aggrExprWithGroupingRefs = newAggrExprWithGroupingRefs)
+      a.copy(aggregateExpressions = newAggregateExpressions)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -28,7 +28,6 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
-import org.apache.spark.sql.catalyst.optimizer.EnforceGroupingReferencesInAggregates
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types._
@@ -408,7 +407,7 @@ package object dsl {
           case ne: NamedExpression => ne
           case e => Alias(e, e.toString)()
         }
-        Aggregate(groupingExprs, aliasedExprs, logicalPlan, false)
+        Aggregate(groupingExprs, aliasedExprs, logicalPlan)
       }
 
       def having(
@@ -467,7 +466,7 @@ package object dsl {
       def analyze: LogicalPlan = {
         val analyzed = analysis.SimpleAnalyzer.execute(logicalPlan)
         analysis.SimpleAnalyzer.checkAnalysis(analyzed)
-        EnforceGroupingReferencesInAggregates(EliminateSubqueryAliases(analyzed))
+        EliminateSubqueryAliases(analyzed)
       }
 
       def hint(name: String, parameters: Any*): LogicalPlan =

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
@@ -35,7 +35,7 @@ trait AliasHelper {
   protected def getAliasMap(plan: Aggregate): AttributeMap[Alias] = {
     // Find all the aliased expressions in the aggregate list that don't include any actual
     // AggregateExpression or PythonUDF, and create a map from the alias to the expression
-    val aliasMap = plan.aggregateExpressions.collect {
+    val aliasMap = plan.aggregateExpressionsWithoutGroupingRefs.collect {
       case a: Alias if a.child.find(e => e.isInstanceOf[AggregateExpression] ||
         PythonUDF.isGroupedAggPandasUDF(e)).isEmpty =>
         (a.toAttribute, a)

diff --git a/...scala/org/apache/spark/sql/catalyst/optimizer/EnforceGroupingReferencesInAggregates.scala b/...scala/org/apache/spark/sql/catalyst/optimizer/EnforceGroupingReferencesInAggregates.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.catalyst.rules.Rule
 object EnforceGroupingReferencesInAggregates extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan transform {
-      case a: Aggregate if !a.enforceGroupingReferences =>
-        Aggregate(a.groupingExpressions, a.aggrExprWithGroupingRefs, a.child)
+      case a: Aggregate =>
+        Aggregate.withGroupingRefs(a.groupingExpressions, a.aggregateExpressions, a.child)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -508,7 +508,7 @@ object RemoveRedundantAggregates extends Rule[LogicalPlan] with AliasHelper {
     case upper @ Aggregate(_, _, lower: Aggregate) if lowerIsRedundant(upper, lower) =>
       val aliasMap = getAliasMap(lower)
 
-      val newAggregate = Aggregate(
+      val newAggregate = Aggregate.withGroupingRefs(
         child = lower.child,
         groupingExpressions = upper.groupingExpressions.map(replaceAlias(_, aliasMap)),
         aggregateExpressions = upper.aggregateExpressions.map(
@@ -752,8 +752,8 @@ object ColumnPruning extends Rule[LogicalPlan] {
     case p @ Project(_, p2: Project) if !p2.outputSet.subsetOf(p.references) =>
       p.copy(child = p2.copy(projectList = p2.projectList.filter(p.references.contains)))
     case p @ Project(_, a: Aggregate) if !a.outputSet.subsetOf(p.references) =>
-      p.copy(child =
-        a.copy(aggrExprWithGroupingRefs = a.aggrExprWithGroupingRefs.filter(p.references.contains)))
+      p.copy(
+        child = a.copy(aggregateExpressions = a.aggregateExpressions.filter(p.references.contains)))
     case a @ Project(_, e @ Expand(_, _, grandChild)) if !e.outputSet.subsetOf(a.references) =>
       val newOutput = e.output.filter(a.references.contains(_))
       val newProjects = e.projections.map { proj =>
@@ -879,8 +879,8 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
       if (haveCommonNonDeterministicOutput(p.projectList, agg.aggregateExpressions)) {
         p
       } else {
-        Aggregate(agg.groupingExpressions,
-          buildCleanedProjectList(p.projectList, agg.aggregateExpressions), agg.child)
+        agg.copy(aggregateExpressions = buildCleanedProjectList(
+          p.projectList, agg.aggregateExpressions))
       }
     case Project(l1, g @ GlobalLimit(_, limit @ LocalLimit(_, p2 @ Project(l2, _))))
         if isRenaming(l1, l2) =>
@@ -1250,7 +1250,6 @@ object EliminateSorts extends Rule[LogicalPlan] {
 
     def checkValidAggregateExpression(expr: Expression): Boolean = expr match {
       case _: AttributeReference => true
-      case _: GroupingExprRef => true
       case ae: AggregateExpression => isOrderIrrelevantAggFunction(ae.aggregateFunction)
       case _: UserDefinedExpression => false
       case e => e.children.forall(checkValidAggregateExpression)
@@ -1985,15 +1984,15 @@ object RemoveLiteralFromGroupExpressions extends Rule[LogicalPlan] {
         val droppedGroupsBefore =
           grouping.scanLeft(0)((n, e) => n + (if (e.foldable) 1 else 0)).toArray
 
-        val newAggrExprWithGroupingReferences =
-          a.aggrExprWithGroupingRefs.map(_.transform {
+        val newAggregateExpressions =
+          a.aggregateExpressions.map(_.transform {
             case g: GroupingExprRef if droppedGroupsBefore(g.ordinal) > 0 =>
               g.copy(ordinal = g.ordinal - droppedGroupsBefore(g.ordinal))
           }.asInstanceOf[NamedExpression])
 
-          a.copy(
-            groupingExpressions = newGrouping,
-            aggrExprWithGroupingRefs = newAggrExprWithGroupingReferences)
+        a.copy(
+          groupingExpressions = newGrouping,
+          aggregateExpressions = newAggregateExpressions)
       } else {
         // All grouping expressions are literals. We should not drop them all, because this can
         // change the return semantics when the input of the Aggregate is empty (SPARK-17114). We
@@ -2024,15 +2023,15 @@ object RemoveRepetitionFromGroupExpressions extends Rule[LogicalPlan] {
           })
         ).toArray
 
-        val newAggrExprWithGroupingReferences =
-          a.aggrExprWithGroupingRefs.map(_.transform {
+        val newAggregateExpressions =
+          a.aggregateExpressions.map(_.transform {
             case g: GroupingExprRef if droppedGroupsBefore(g.ordinal) > 0 =>
               g.copy(ordinal = g.ordinal - droppedGroupsBefore(g.ordinal))
           }.asInstanceOf[NamedExpression])
 
         a.copy(
           groupingExpressions = newGrouping,
-          aggrExprWithGroupingRefs = newAggrExprWithGroupingReferences)
+          aggregateExpressions = newAggregateExpressions)
       }
   }
 }

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -612,9 +612,10 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
    * subqueries.
    */
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUpWithNewOutput {
-    case a @ Aggregate(grouping, expressions, child) =>
+    case a @ Aggregate(grouping, _, child) =>
       val subqueries = ArrayBuffer.empty[ScalarSubquery]
-      val rewriteExprs = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries))
+      val rewriteExprs = a.aggregateExpressionsWithoutGroupingRefs
+        .map(extractCorrelatedScalarSubqueries(_, subqueries))
       if (subqueries.nonEmpty) {
         // We currently only allow correlated subqueries in an aggregate if they are part of the
         // grouping expressions. As a result we need to replace all the scalar subqueries in the

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -726,7 +726,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         Filter(predicate, createProject())
       } else {
         // According to SQL standard, HAVING without GROUP BY means global aggregate.
-        withHavingClause(havingClause, Aggregate(Nil, namedExpressions, withFilter, false))
+        withHavingClause(havingClause, Aggregate(Nil, namedExpressions, withFilter))
       }
     } else if (aggregationClause != null) {
       val aggregate = withAggregationClause(aggregationClause, namedExpressions, withFilter)
@@ -924,7 +924,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         val groupingSets =
           ctx.groupingSet.asScala.map(_.expression.asScala.map(e => expression(e)).toSeq)
         Aggregate(Seq(GroupingSets(groupingSets.toSeq, groupByExpressions)),
-          selectExpressions, query, false)
+          selectExpressions, query)
       } else {
         // GROUP BY .... (WITH CUBE | WITH ROLLUP)?
         val mappedGroupByExpressions = if (ctx.CUBE != null) {
@@ -934,7 +934,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         } else {
           groupByExpressions
         }
-        Aggregate(mappedGroupByExpressions, selectExpressions, query, false)
+        Aggregate(mappedGroupByExpressions, selectExpressions, query)
       }
     } else {
       val groupByExpressions =
@@ -978,7 +978,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
           "`GROUP BY CUBE(a, b), ROLLUP(a, c)` is not supported.",
           ctx)
       }
-      Aggregate(groupByExpressions.toSeq, selectExpressions, query, false)
+      Aggregate(groupByExpressions.toSeq, selectExpressions, query)
     }
   }