-
Notifications
You must be signed in to change notification settings - Fork 29k
[Spark-25993][SQL][TEST]Add test cases for CREATE EXTERNAL TABLE with subdirectories #23108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
4e45ef9
e238764
e731746
d6e582b
39ebcf8
d75b923
fe472c8
51d1d78
d851169
fef8c68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,7 @@ import org.apache.spark.util.Utils | |
| class HiveParquetSourceSuite extends ParquetPartitioningTest { | ||
| import testImplicits._ | ||
| import spark._ | ||
| import java.io.IOException | ||
|
|
||
| override def beforeAll(): Unit = { | ||
| super.beforeAll() | ||
|
|
@@ -222,4 +223,66 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { | |
| assert(df4.columns === Array("str", "max_int")) | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please fix this first for the first and second review comments.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dongjoon-hyun Thanks for the comments, I have tried to make the changes for the first and second review comments, I changed both suites to make it looks similar, also add more test cases. For the 3rd comments, I haven't found a common place to both suites, when you say the help function missing in the previous commit, can you help to point what kind of help function I missed? Thanks. |
||
| withTempPath { path => | ||
| withTable("tbl1", "tbl2", "tbl3") { | ||
| val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). | ||
| toDF("c1", "c2", "c3").repartition(1) | ||
| val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/" | ||
| val parentDir = s"${path.getCanonicalPath}/l3/l2/" | ||
| val wildcardParentDir = new File(s"${path}/l3/l2/*").toURI | ||
| val wildcardL3Dir = new File(s"${path}/l3/*").toURI | ||
| someDF1.write.parquet(dataDir) | ||
| val parentDirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl1( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS parquet | ||
| |LOCATION '${parentDir}'""".stripMargin | ||
| sql(parentDirStatement) | ||
| val wildcardStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl2( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS parquet | ||
| |LOCATION '${wildcardParentDir}'""".stripMargin | ||
| sql(wildcardStatement) | ||
| val wildcardL3Statement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl3( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS parquet | ||
| |LOCATION '${wildcardL3Dir}'""".stripMargin | ||
| sql(wildcardL3Statement) | ||
|
|
||
| Seq("true", "false").foreach { parquetConversion => | ||
| withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { | ||
| if (parquetConversion == "true") { | ||
| checkAnswer(sql("select * from tbl1"), Nil) | ||
| checkAnswer(sql("select * from tbl2"), | ||
| (1 to 2).map(i => Row(i, i, s"parq$i"))) | ||
| checkAnswer(sql("select * from tbl3"), Nil) | ||
| } else { | ||
| Seq("select * from tbl1", "select * from tbl2", "select * from tbl3").foreach { | ||
| sqlStmt => | ||
| try { | ||
| sql(sqlStmt) | ||
| } catch { | ||
| case e: IOException => | ||
| assert(e.getMessage().contains("java.io.IOException: Not a file")) | ||
| } | ||
|
||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -73,9 +73,11 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { | |
| sql( | ||
| s""" | ||
| |CREATE TABLE $tableName | ||
|
|
||
| |USING org.apache.spark.sql.hive.orc | ||
| |OPTIONS ( | ||
| | PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}' | ||
| | PATH '${new File(orcTableAsDir.getAbsolutePath | ||
| ).toURI}' | ||
|
||
| |) | ||
| """.stripMargin) | ||
|
|
||
|
|
@@ -191,10 +193,101 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { | |
| } | ||
| } | ||
|
|
||
| test("SPARK-25993 Add test cases for resolution of ORC table location") { | ||
| test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { | ||
| Seq(true, false).foreach { convertMetastore => | ||
| withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { | ||
| testORCTableLocation(convertMetastore) | ||
| withTempDir { dir => | ||
| val dataDir = new File(s"${dir.getCanonicalPath}/l3/l2/l1/").toURI | ||
| val parentDir = s"${dir.getCanonicalPath}/l3/l2/" | ||
| val l3Dir = s"${dir.getCanonicalPath}/l3/" | ||
| val wildcardParentDir = new File(s"${dir}/l3/l2/*").toURI | ||
| val wildcardL3Dir = new File(s"${dir}/l3/*").toURI | ||
|
|
||
| try { | ||
| hiveClient.runSqlHive("USE default") | ||
| hiveClient.runSqlHive( | ||
| """ | ||
| |CREATE EXTERNAL TABLE hive_orc( | ||
| | C1 INT, | ||
| | C2 INT, | ||
| | C3 STRING) | ||
| |STORED AS orc""".stripMargin) | ||
| // Hive throws an exception if I assign the location in the create table statement. | ||
| hiveClient.runSqlHive( | ||
| s"ALTER TABLE hive_orc SET LOCATION '$dataDir'") | ||
| hiveClient.runSqlHive( | ||
| """ | ||
| |INSERT INTO TABLE hive_orc | ||
| |VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin) | ||
|
|
||
| withTable("tbl1", "tbl2", "tbl3", "tbl4") { | ||
| val parentDirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl1( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '${parentDir}'""".stripMargin | ||
| sql(parentDirStatement) | ||
| val parentDirSqlStatement = s"select * from tbl1" | ||
| if (convertMetastore) { | ||
| checkAnswer(sql(parentDirSqlStatement), Nil) | ||
| } else { | ||
| checkAnswer(sql(parentDirSqlStatement), | ||
| (1 to 2).map(i => Row(i, i, s"orc$i"))) | ||
| } | ||
|
|
||
| val l3DirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl2( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '${l3Dir}'""".stripMargin | ||
| sql(l3DirStatement) | ||
| val l3DirSqlStatement = s"select * from tbl2" | ||
| if (convertMetastore) { | ||
| checkAnswer(sql(l3DirSqlStatement), Nil) | ||
| } else { | ||
| checkAnswer(sql(l3DirSqlStatement), | ||
| (1 to 2).map(i => Row(i, i, s"orc$i"))) | ||
| } | ||
|
|
||
| val wildcardStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl3( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '$wildcardParentDir'""".stripMargin | ||
| sql(wildcardStatement) | ||
| val wildcardSqlStatement = s"select * from tbl3" | ||
| if (convertMetastore) { | ||
| checkAnswer(sql(wildcardSqlStatement), | ||
| (1 to 2).map(i => Row(i, i, s"orc$i"))) | ||
| } else { | ||
| checkAnswer(sql(wildcardSqlStatement), Nil) | ||
| } | ||
|
|
||
| val wildcardL3Statement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl4( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '$wildcardL3Dir'""".stripMargin | ||
| sql(wildcardL3Statement) | ||
| val wildcardL3SqlStatement = s"select * from tbl4" | ||
| checkAnswer(sql(wildcardL3SqlStatement), Nil) | ||
| } | ||
| } finally { | ||
| hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that we need to clean up
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dongjoon-hyun at line 221, I put the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. I missed that. |
||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This had better go to line 20.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok