-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18572][SQL] Add a method listPartitionNames to ExternalCatalog
#15998
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 1 commit
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
826dee6
[SPARK-18572][SQL] Add a method `listPartitionName` to `ExternalCatalog`
54171ad
Make `ExternalCatalog.listPartitionNames` abstract and implement that
93cee97
Insert a couple of cosmetic newlines
27dc672
Modify `HiveCommandSuite` to make it test partition columns with
2a7b062
Formatting and code commenting
d183946
Code documentation and partition name/value path name escaping
9c71521
Add unit tests for the new `ExternalCatalog.listPartitionNames` method
b9dd303
Revert modifications to the HiveCommandSuite
28563d4
Explicitly sort the results from
860d985
Build and use a map of lower-cased partition column names to exact
fc57f23
Add tests for `SessionCatalog.listPartitionNames` and enhance coverage
37fc595
Fix something in DataSourceStrategy.scala. Patch provided by @gatorsmile
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Modify
HiveCommandSuite to make it test partition columns with
uppercase characters in their names
- Loading branch information
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,25 +53,28 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto | |
| |TBLPROPERTIES('prop1Key'="prop1Val", '`prop2Key`'="prop2Val") | ||
| """.stripMargin) | ||
| sql("CREATE TABLE parquet_tab3(col1 int, `col 2` int)") | ||
| sql("CREATE TABLE parquet_tab4 (price int, qty int) partitioned by (year int, month int)") | ||
| sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 1) SELECT 1, 1") | ||
| sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 2) SELECT 2, 2") | ||
| sql("INSERT INTO parquet_tab4 PARTITION(year = 2016, month = 2) SELECT 3, 3") | ||
| sql("INSERT INTO parquet_tab4 PARTITION(year = 2016, month = 3) SELECT 3, 3") | ||
|
|
||
| // NB: some table partition column names in this test suite have upper-case characters to test | ||
| // column name case preservation. Do not lowercase these partition names without good reason. | ||
| sql("CREATE TABLE parquet_tab4 (price int, qty int) partitioned by (Year int, Month int)") | ||
| sql("INSERT INTO parquet_tab4 PARTITION(Year = 2015, Month = 1) SELECT 1, 1") | ||
| sql("INSERT INTO parquet_tab4 PARTITION(Year = 2015, Month = 2) SELECT 2, 2") | ||
| sql("INSERT INTO parquet_tab4 PARTITION(Year = 2016, Month = 2) SELECT 3, 3") | ||
| sql("INSERT INTO parquet_tab4 PARTITION(Year = 2016, Month = 3) SELECT 3, 3") | ||
| sql( | ||
| """ | ||
| |CREATE TABLE parquet_tab5 (price int, qty int) | ||
| |PARTITIONED BY (year int, month int, hour int, minute int, sec int, extra int) | ||
| |PARTITIONED BY (Year int, Month int, hour int, minute int, sec int, extra int) | ||
| """.stripMargin) | ||
| sql( | ||
| """ | ||
| |INSERT INTO parquet_tab5 | ||
| |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3 | ||
| |PARTITION(Year = 2016, Month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3 | ||
| """.stripMargin) | ||
| sql( | ||
| """ | ||
| |INSERT INTO parquet_tab5 | ||
| |PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3 | ||
| |PARTITION(Year = 2016, Month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3 | ||
| """.stripMargin) | ||
| sql("CREATE VIEW parquet_view1 as select * from parquet_tab4") | ||
| } | ||
|
|
@@ -183,7 +186,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto | |
| sql( | ||
| """ | ||
| |CREATE TABLE part_table (employeeID INT, employeeName STRING) | ||
| |PARTITIONED BY (c STRING, d STRING) | ||
| |PARTITIONED BY (C STRING, d STRING) | ||
| |ROW FORMAT DELIMITED | ||
| |FIELDS TERMINATED BY '|' | ||
| |LINES TERMINATED BY '\n' | ||
|
|
@@ -195,24 +198,24 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto | |
| } | ||
|
|
||
| intercept[AnalysisException] { | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(c="1")""") | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(C="1")""") | ||
| } | ||
| intercept[AnalysisException] { | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(d="1")""") | ||
| } | ||
| intercept[AnalysisException] { | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(c="1", k="2")""") | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(C="1", k="2")""") | ||
| } | ||
|
|
||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(c="1", d="2")""") | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(C="1", d="2")""") | ||
| checkAnswer( | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE c = '1' AND d = '2'"), | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE C = '1' AND d = '2'"), | ||
| sql("SELECT * FROM non_part_table").collect()) | ||
|
|
||
| // Different order of partition columns. | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(d="1", c="2")""") | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(d="1", C="2")""") | ||
| checkAnswer( | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE c = '2' AND d = '1'"), | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE C = '2' AND d = '1'"), | ||
| sql("SELECT * FROM non_part_table").collect()) | ||
| } | ||
| } | ||
|
|
@@ -296,38 +299,38 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto | |
| sql( | ||
| """ | ||
| |CREATE TABLE part_table (employeeID INT, employeeName STRING) | ||
| |PARTITIONED BY (c STRING, d STRING) | ||
| |PARTITIONED BY (C STRING, d STRING) | ||
| |ROW FORMAT DELIMITED | ||
| |FIELDS TERMINATED BY '|' | ||
| |LINES TERMINATED BY '\n' | ||
| """.stripMargin) | ||
|
|
||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(c="1", d="1")""") | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(C="1", d="1")""") | ||
| checkAnswer( | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE c = '1' AND d = '1'"), | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE C = '1' AND d = '1'"), | ||
| testResults) | ||
|
|
||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(c="1", d="2")""") | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(C="1", d="2")""") | ||
| checkAnswer( | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE c = '1' AND d = '2'"), | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE C = '1' AND d = '2'"), | ||
| testResults) | ||
|
|
||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(c="2", d="2")""") | ||
| sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table PARTITION(C="2", d="2")""") | ||
| checkAnswer( | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE c = '2' AND d = '2'"), | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE C = '2' AND d = '2'"), | ||
| testResults) | ||
|
|
||
| sql("TRUNCATE TABLE part_table PARTITION(c='1', d='1')") | ||
| sql("TRUNCATE TABLE part_table PARTITION(C='1', d='1')") | ||
| checkAnswer( | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE c = '1' AND d = '1'"), | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE C = '1' AND d = '1'"), | ||
| Seq.empty[Row]) | ||
| checkAnswer( | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE c = '1' AND d = '2'"), | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE C = '1' AND d = '2'"), | ||
| testResults) | ||
|
|
||
| sql("TRUNCATE TABLE part_table PARTITION(c='1')") | ||
| sql("TRUNCATE TABLE part_table PARTITION(C='1')") | ||
| checkAnswer( | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE c = '1'"), | ||
| sql("SELECT employeeID, employeeName FROM part_table WHERE C = '1'"), | ||
| Seq.empty[Row]) | ||
|
|
||
| sql("TRUNCATE TABLE part_table") | ||
|
|
@@ -341,40 +344,40 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto | |
| test("show partitions - show everything") { | ||
| checkAnswer( | ||
| sql("show partitions parquet_tab4"), | ||
| Row("year=2015/month=1") :: | ||
| Row("year=2015/month=2") :: | ||
| Row("year=2016/month=2") :: | ||
| Row("year=2016/month=3") :: Nil) | ||
| Row("Year=2015/Month=1") :: | ||
| Row("Year=2015/Month=2") :: | ||
| Row("Year=2016/Month=2") :: | ||
| Row("Year=2016/Month=3") :: Nil) | ||
|
|
||
| checkAnswer( | ||
| sql("show partitions default.parquet_tab4"), | ||
| Row("year=2015/month=1") :: | ||
| Row("year=2015/month=2") :: | ||
| Row("year=2016/month=2") :: | ||
| Row("year=2016/month=3") :: Nil) | ||
| Row("Year=2015/Month=1") :: | ||
| Row("Year=2015/Month=2") :: | ||
| Row("Year=2016/Month=2") :: | ||
| Row("Year=2016/Month=3") :: Nil) | ||
| } | ||
|
|
||
| test("show partitions - show everything more than 5 part keys") { | ||
| checkAnswer( | ||
| sql("show partitions parquet_tab5"), | ||
| Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") :: | ||
| Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil) | ||
| Row("Year=2016/Month=3/hour=10/minute=10/sec=10/extra=1") :: | ||
| Row("Year=2016/Month=4/hour=10/minute=10/sec=10/extra=1") :: Nil) | ||
| } | ||
|
|
||
| test("show partitions - filter") { | ||
| checkAnswer( | ||
| sql("show partitions default.parquet_tab4 PARTITION(year=2015)"), | ||
| Row("year=2015/month=1") :: | ||
| Row("year=2015/month=2") :: Nil) | ||
| sql("show partitions default.parquet_tab4 PARTITION(Year=2015)"), | ||
| Row("Year=2015/Month=1") :: | ||
| Row("Year=2015/Month=2") :: Nil) | ||
|
|
||
| checkAnswer( | ||
| sql("show partitions default.parquet_tab4 PARTITION(year=2015, month=1)"), | ||
| Row("year=2015/month=1") :: Nil) | ||
| sql("show partitions default.parquet_tab4 PARTITION(Year=2015, Month=1)"), | ||
| Row("Year=2015/Month=1") :: Nil) | ||
|
|
||
| checkAnswer( | ||
| sql("show partitions default.parquet_tab4 PARTITION(month=2)"), | ||
| Row("year=2015/month=2") :: | ||
| Row("year=2016/month=2") :: Nil) | ||
| sql("show partitions default.parquet_tab4 PARTITION(Month=2)"), | ||
| Row("Year=2015/Month=2") :: | ||
| Row("Year=2016/Month=2") :: Nil) | ||
| } | ||
|
|
||
| test("show partitions - empty row") { | ||
|
|
@@ -408,14 +411,18 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto | |
|
|
||
| test("show partitions - datasource") { | ||
| withTable("part_datasrc") { | ||
| val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c") | ||
| val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("A", "b", "c") | ||
| df.write | ||
| .partitionBy("a") | ||
| .partitionBy("A") | ||
| .format("parquet") | ||
| .mode(SaveMode.Overwrite) | ||
| .saveAsTable("part_datasrc") | ||
|
|
||
| assert(sql("SHOW PARTITIONS part_datasrc").count() == 3) | ||
| checkAnswer( | ||
| sql("SHOW PARTITIONS part_datasrc"), | ||
| Row("A=1") :: | ||
| Row("A=2") :: | ||
|
||
| Row("A=3") :: Nil) | ||
| } | ||
| } | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code changes are not related to this PR. How about submitting a separate PR for this purpose? These changes only cover the Hive serde table. We should create dedicated test cases, if we do not have such test cases.