Handle case of one distinct grouping with superficially different fun…

…ction children to Spark strategies
apache · bersprockets · Mar 11, 2022 · Mar 11, 2022 · Mar 18, 2022 · Mar 18, 2022
commit 9938252d65861651601cef2db24ea12fa5a1ce16
diff --git a/...st/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/...st/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -405,28 +405,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
       }
       Aggregate(groupByAttrs, patchedAggExpressions, firstAggregate)
     } else {
-      // We may have one distinct group only because we grouped using ExpressionSet.
-      // To prevent SparkStrategies from complaining during sanity check, we need to check whether
-      // the original list of aggregate expressions had multiple distinct groups and, if so,
-      // patch that list so we have only one distinct group.
-      val funcChildren = distinctAggs.flatMap { e =>
-        e.aggregateFunction.children.filter(!_.foldable)
-      }
-      val funcChildrenLookup = funcChildren.map { e =>
-        (e, funcChildren.find(fc => e.semanticEquals(fc)).getOrElse(e))
-      }.toMap
-
-      if (funcChildrenLookup.keySet.size > funcChildrenLookup.values.toSet.size) {
-        val patchedAggExpressions = a.aggregateExpressions.map { e =>
-          e.transformDown {
-            case e: Expression =>
-              funcChildrenLookup.getOrElse(e, e)
-          }.asInstanceOf[NamedExpression]
-        }
-        a.copy(aggregateExpressions = patchedAggExpressions)
-      } else {
-        a
-      }
+      a
     }
   }
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -527,8 +527,10 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
         val (functionsWithDistinct, functionsWithoutDistinct) =
           aggregateExpressions.partition(_.isDistinct)
-        if (functionsWithDistinct.map(
-          _.aggregateFunction.children.filterNot(_.foldable).toSet).distinct.length > 1) {
+        val distinctAggChildSets = functionsWithDistinct.map { ae =>
+          ExpressionSet(ae.aggregateFunction.children.filterNot(_.foldable))
+        }.distinct
+        if (distinctAggChildSets.length > 1) {
           // This is a sanity check. We should not reach here when we have multiple distinct
           // column sets. Our `RewriteDistinctAggregates` should take care this case.
           throw new IllegalStateException(
@@ -560,7 +562,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
             // [COUNT(DISTINCT bar), COUNT(DISTINCT foo)] is disallowed because those two distinct
             // aggregates have different column expressions.
             val distinctExpressions =
-              functionsWithDistinct.head.aggregateFunction.children.filterNot(_.foldable)
+            functionsWithDistinct.flatMap(
+              _.aggregateFunction.children.filterNot(_.foldable)).distinct
             val normalizedNamedDistinctExpressions = distinctExpressions.map { e =>
               // Ideally this should be done in `NormalizeFloatingNumbers`, but we do it here
               // because `distinctExpressions` is not extracted during logical phase.