-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-15752] [SQL] Optimize metadata only query that has an aggregate whose children are deterministic project or filter operators. #13494
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
2ca2c38
edea710
8426522
153293e
7dfb743
68e6d6d
595ef36
7d7ece0
2e55a9d
b2b6eba
c5a291e
6404c1f
1bb5812
7e3729e
fbf5d61
3411fd6
aefab7f
c5ccdea
ae6cf9f
159331b
3a1438b
c0a7d59
a4045ca
0a023e7
a9b38ab
a5ea995
1bed08d
a22e962
41fef2c
bd53678
88f7308
2568193
26a97f4
4297f9f
1a65aa7
d5e0df4
9d6dd76
9cb01d8
3e2687d
2b4faf3
88fd3bf
a894bb7
9546b40
85b695b
bcfe8e5
67211be
501f93b
8ee2a8c
d888c85
ff16509
358ad13
030776a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1692,66 +1692,68 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { | |
|
|
||
| test("spark-15752 metadata only optimizer for hive table") { | ||
| withSQLConf(SQLConf.OPTIMIZER_METADATA_ONLY.key -> "true") { | ||
| val df = Seq((1, "2"), (3, "4")).toDF("key", "value") | ||
| df.createOrReplaceTempView("data_15752") | ||
| sql( | ||
| """ | ||
| |CREATE TABLE srcpart_15752 (key INT, value STRING) | ||
| |PARTITIONED BY (ds STRING, hr INT) STORED AS parquet | ||
| """.stripMargin) | ||
| for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq(11, 12)) { | ||
| withTable("data_15752", "srcpart_15752", "srctext_15752") { | ||
| val df = Seq((1, "2"), (3, "4")).toDF("key", "value") | ||
| df.createOrReplaceTempView("data_15752") | ||
| sql( | ||
| s""" | ||
| |INSERT OVERWRITE TABLE srcpart_15752 PARTITION (ds='$ds',hr='$hr') | ||
| |select key, value from data_15752 | ||
| """.stripMargin) | ||
| } | ||
| checkAnswer(sql("select hr from srcpart_15752 where hr = 11 group by hr"), Row(11)) | ||
| checkAnswer(sql("select max(hr) from srcpart_15752"), Row(12)) | ||
| checkAnswer(sql("select max(hr) from srcpart_15752 where hr = 11"), Row(11)) | ||
| checkAnswer(sql("select max(hr) from (select hr from srcpart_15752) t"), Row(12)) | ||
| checkAnswer( | ||
| sql("select max(x) from (select hr + 1 as x from srcpart_15752 where hr = 12) t"), | ||
| Row(13)) | ||
| checkAnswer(sql("select distinct hr from srcpart_15752"), Row(11) :: Row(12) :: Nil) | ||
| checkAnswer(sql("select distinct hr from srcpart_15752 where hr = 11"), Row(11)) | ||
| checkAnswer( | ||
| sql("select distinct x from (select hr + 1 as x from srcpart_15752 where hr = 12) t"), | ||
| Row(13)) | ||
| """ | ||
| |CREATE TABLE srcpart_15752 (key INT, value STRING) | ||
| |PARTITIONED BY (ds STRING, hr INT) STORED AS parquet | ||
| """.stripMargin) | ||
| for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq(11, 12)) { | ||
| sql( | ||
| s""" | ||
| |INSERT OVERWRITE TABLE srcpart_15752 PARTITION (ds='$ds',hr='$hr') | ||
| |select key, value from data_15752 | ||
| """.stripMargin) | ||
| } | ||
| checkAnswer(sql("select hr from srcpart_15752 where hr = 11 group by hr"), Row(11)) | ||
| checkAnswer(sql("select max(hr) from srcpart_15752"), Row(12)) | ||
| checkAnswer(sql("select max(hr) from srcpart_15752 where hr = 11"), Row(11)) | ||
| checkAnswer(sql("select max(hr) from (select hr from srcpart_15752) t"), Row(12)) | ||
| checkAnswer( | ||
| sql("select max(x) from (select hr + 1 as x from srcpart_15752 where hr = 12) t"), | ||
| Row(13)) | ||
| checkAnswer(sql("select distinct hr from srcpart_15752"), Row(11) :: Row(12) :: Nil) | ||
| checkAnswer(sql("select distinct hr from srcpart_15752 where hr = 11"), Row(11)) | ||
| checkAnswer( | ||
| sql("select distinct x from (select hr + 1 as x from srcpart_15752 where hr = 12) t"), | ||
| Row(13)) | ||
|
|
||
| // Now donot support metadata only optimizer | ||
| checkAnswer( | ||
| sql("select hr from srcpart_15752 where hr = 12 group by rollup(hr)"), | ||
| Row(null) :: Row(12) :: Nil) | ||
| checkAnswer( | ||
| sql("select hr from (select hr from srcpart_15752 where hr = 11 union all " + | ||
| "select hr from srcpart_15752 where hr= 12)t group by hr"), | ||
| Row(11) :: Row(12) :: Nil) | ||
| // Now donot support metadata only optimizer | ||
|
||
| checkAnswer( | ||
| sql("select hr from srcpart_15752 where hr = 12 group by rollup(hr)"), | ||
| Row(null) :: Row(12) :: Nil) | ||
| checkAnswer( | ||
| sql("select hr from (select hr from srcpart_15752 where hr = 11 union all " + | ||
| "select hr from srcpart_15752 where hr= 12)t group by hr"), | ||
| Row(11) :: Row(12) :: Nil) | ||
|
|
||
| sql( | ||
| """ | ||
| |CREATE TABLE srctext_15752 (key INT, value STRING) | ||
| |PARTITIONED BY (ds STRING, hr INT) STORED AS textfile | ||
| """.stripMargin) | ||
| for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq(11, 12)) { | ||
| sql( | ||
| s""" | ||
| |INSERT OVERWRITE TABLE srctext_15752 PARTITION (ds='$ds',hr='$hr') | ||
| |select key, value from data_15752 | ||
| """.stripMargin) | ||
| """ | ||
| |CREATE TABLE srctext_15752 (key INT, value STRING) | ||
| |PARTITIONED BY (ds STRING, hr INT) STORED AS textfile | ||
| """.stripMargin) | ||
| for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq(11, 12)) { | ||
| sql( | ||
| s""" | ||
| |INSERT OVERWRITE TABLE srctext_15752 PARTITION (ds='$ds',hr='$hr') | ||
| |select key, value from data_15752 | ||
| """.stripMargin) | ||
| } | ||
| checkAnswer(sql("select hr from srctext_15752 where hr = 11 group by hr"), Row(11)) | ||
| checkAnswer(sql("select max(hr) from srctext_15752"), Row(12)) | ||
| checkAnswer(sql("select max(hr) from srctext_15752 where hr = 11"), Row(11)) | ||
| checkAnswer(sql("select max(hr) from (select hr from srctext_15752) t"), Row(12)) | ||
| checkAnswer( | ||
| sql("select max(x) from (select hr + 1 as x from srctext_15752 where hr = 12) t"), | ||
| Row(13)) | ||
| checkAnswer(sql("select distinct hr from srctext_15752"), Row(11) :: Row(12) :: Nil) | ||
| checkAnswer(sql("select distinct hr from srctext_15752 where hr = 11"), Row(11)) | ||
| checkAnswer( | ||
| sql("select distinct x from (select hr + 1 as x from srctext_15752 where hr = 12) t"), | ||
| Row(13)) | ||
| } | ||
| checkAnswer(sql("select hr from srctext_15752 where hr = 11 group by hr"), Row(11)) | ||
| checkAnswer(sql("select max(hr) from srctext_15752"), Row(12)) | ||
| checkAnswer(sql("select max(hr) from srctext_15752 where hr = 11"), Row(11)) | ||
| checkAnswer(sql("select max(hr) from (select hr from srctext_15752) t"), Row(12)) | ||
| checkAnswer( | ||
| sql("select max(x) from (select hr + 1 as x from srctext_15752 where hr = 12) t"), | ||
| Row(13)) | ||
| checkAnswer(sql("select distinct hr from srctext_15752"), Row(11) :: Row(12) :: Nil) | ||
| checkAnswer(sql("select distinct hr from srctext_15752 where hr = 11"), Row(11)) | ||
| checkAnswer( | ||
| sql("select distinct x from (select hr + 1 as x from srctext_15752 where hr = 12) t"), | ||
| Row(13)) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The session is shared among all test suites, so we should drop the table after all tests here, or we may pollute other test suites.