Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ abstract class Optimizer(catalogManager: CatalogManager)
RewriteNonCorrelatedExists,
ComputeCurrentTime,
GetCurrentDatabaseAndCatalog(catalogManager),
RewriteDistinctAggregates,
ReplaceDeduplicateWithAggregate) ::
//////////////////////////////////////////////////////////////////////////////////////////
// Optimizer rules start here
Expand Down Expand Up @@ -196,6 +195,10 @@ abstract class Optimizer(catalogManager: CatalogManager)
EliminateSorts) :+
Batch("Decimal Optimizations", fixedPoint,
DecimalAggregates) :+
// This batch must run after "Decimal Optimizations", as that one may change the
// aggregate distinct column
Batch("Distinct Aggregate Rewrite", Once,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add a comment to say: this batch must be run after "Decimal Optimizations", as "Decimal Optimizations" may change the aggregate distinct column?

RewriteDistinctAggregates) :+
Batch("Object Expressions Optimization", fixedPoint,
EliminateMapObjects,
CombineTypedFilters,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2555,6 +2555,15 @@ class DataFrameSuite extends QueryTest
val df = Seq(0.0 -> -0.0).toDF("pos", "neg")
checkAnswer(df.select($"pos" > $"neg"), Row(false))
}

test("SPARK-32816: aggregating multiple distinct DECIMAL columns") {
spark.range(0, 100, 1, 1)
.selectExpr("id", "cast(id as decimal(9, 0)) as decimal_col")
.createOrReplaceTempView("test_table")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: wrap the test with withTempView

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about writing it like this w/o a temp view;

  test("SPARK-32816: aggregating multiple distinct DECIMAL columns") {
    checkAnswer(
      sql(
        s"""
           |SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col)
           |  FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col)
         """.stripMargin),
      Row(XXX, XXX))
  }

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Also, could you move this test into SQLQueryTestSuite?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me try this, I'm not sure if literal will behave differently

checkAnswer(
sql("select avg(distinct decimal_col), sum(distinct decimal_col) from test_table"),
Row(49.5, 4950))
}
}

case class GroupByKey(a: Int, b: Int)