Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ae1186f
[SPARK-34581][SQL] Don't optimize out grouping expressions from aggre…
peter-toth Mar 21, 2021
5ab9f75
comment fix
peter-toth Mar 21, 2021
2293fd4
move logic to the beginning of optimization, simplify test
peter-toth Mar 22, 2021
3de19ca
regenerate approved plans
peter-toth Mar 22, 2021
04e61c5
Merge branch 'master' into SPARK-34581-keep-grouping-expressions
peter-toth Mar 23, 2021
6e05f14
define GroupingExpression as TaggingExpression
peter-toth Mar 23, 2021
09f1a85
move test to SQLQueryTestSuite
peter-toth Mar 24, 2021
f46b89d
add more explanation
peter-toth Mar 24, 2021
56589a3
Merge commit 'c8233f1be5c2f853f42cda367475eb135a83afd5' into SPARK-34…
peter-toth Mar 26, 2021
ea95bff
Merge commit '3951e3371a83578a81474ed99fb50d59f27aac62' into SPARK-34…
peter-toth Mar 31, 2021
7ea2306
Merge commit '89ae83d19b9652348a685550c2c49920511160d5' into SPARK-34…
peter-toth Apr 1, 2021
468534f
Merge commit '65da9287bc5112564836a555cd2967fc6b05856f' into SPARK-34…
peter-toth Apr 2, 2021
977c0bf
new GroupingExprRef approach
peter-toth Mar 27, 2021
c2ba804
simplify
peter-toth Apr 11, 2021
0622444
minor fixes
peter-toth Apr 12, 2021
343f35e
Merge commit 'e40fce919ab77f5faeb0bbd34dc86c56c04adbaa' into SPARK-34…
peter-toth Apr 12, 2021
2e79eb9
review fixes
peter-toth Apr 13, 2021
cff9b9a
fix latest test failures, add new test case
peter-toth Apr 14, 2021
78296a8
better non-deterministic test case
peter-toth Apr 14, 2021
72c173b
make new rules non excludable
peter-toth Apr 15, 2021
34f0439
Merge branch 'fork/master' into SPARK-34581-keep-grouping-expressions
peter-toth Apr 15, 2021
fb3a19d
fix validConstraints, minor changes
peter-toth Apr 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
move test to SQLQueryTestSuite
  • Loading branch information
peter-toth committed Mar 24, 2021
commit 09f1a85d8312e56c886b31f26ae87d245288d2b9
5 changes: 5 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/group-by.sql
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,8 @@ SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(

-- Aggregate with multiple distinct decimal columns
SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col);

-- SPARK-34581: Don't optimize out grouping expressions from aggregate expressions without aggregate function
SELECT not(a IS NULL), count(*) AS c
FROM testData
GROUP BY a IS NULL
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we also test select a + b + rand() .... group by a + b

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in cff9b9a.

13 changes: 12 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/group-by.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 62
-- Number of queries: 63


-- !query
Expand Down Expand Up @@ -642,3 +642,14 @@ SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1
struct<avg(DISTINCT decimal_col):decimal(13,4),sum(DISTINCT decimal_col):decimal(19,0)>
-- !query output
1.0000 1


-- !query
SELECT not(a IS NULL), count(*) AS c
FROM testData
GROUP BY a IS NULL
-- !query schema
struct<(NOT (a IS NULL)):boolean,c:bigint>
-- !query output
false 2
true 7
14 changes: 0 additions & 14 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4140,20 +4140,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
}
}
}

test("SPARK-34581: Don't optimize out grouping expressions from aggregate expressions") {
withTempView("t") {
Seq[Integer](null, 1, 2, 3, null).toDF("id").createOrReplaceTempView("t")

val df = spark.sql(
"""
|SELECT not(t.id IS NULL), count(*) AS c
|FROM t
|GROUP BY t.id IS NULL
|""".stripMargin)
checkAnswer(df, Row(true, 3) :: Row(false, 2) :: Nil)
}
}
}

case class Foo(bar: Option[String])