-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-25993][SQL][TESTS] Add test cases for CREATE EXTERNAL TABLE with subdirectories #27130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| package org.apache.spark.sql.hive | ||
|
|
||
| import java.io.File | ||
| import java.io.IOException | ||
|
|
||
| import org.apache.spark.sql.{Row, SaveMode} | ||
| import org.apache.spark.sql.catalyst.catalog.HiveTableRelation | ||
|
|
@@ -222,4 +223,115 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest { | |
| assert(df4.columns === Array("str", "max_int")) | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { | ||
| Seq("true", "false").foreach { parquetConversion => | ||
| withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { | ||
| withTempPath { path => | ||
| withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { | ||
| val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")). | ||
| toDF("c1", "c2", "c3").repartition(1) | ||
| val someDF2 = Seq((3, 3, "parq3"), (4, 4, "parq4")). | ||
| toDF("c1", "c2", "c3").repartition(1) | ||
| val someDF3 = Seq((5, 5, "parq5"), (6, 6, "parq6")). | ||
| toDF("c1", "c2", "c3").repartition(1) | ||
| someDF1.write.parquet(s"${path.getCanonicalPath}/l1/") | ||
| someDF2.write.parquet(s"${path.getCanonicalPath}/l1/l2/") | ||
| someDF3.write.parquet(s"${path.getCanonicalPath}/l1/l2/l3/") | ||
|
|
||
| val topDirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl1( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS parquet | ||
| |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin | ||
| sql(topDirStatement) | ||
| if (parquetConversion == "true") { | ||
| checkAnswer(sql("select * from tbl1"), Nil) | ||
| } else { | ||
| intercept[IOException](sql("select * from tbl1").show()) | ||
|
||
| } | ||
|
|
||
| val l1DirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl2( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS parquet | ||
| |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin | ||
| sql(l1DirStatement) | ||
| if (parquetConversion == "true") { | ||
| checkAnswer(sql("select * from tbl2"), | ||
|
||
| (1 to 2).map(i => Row(i, i, s"parq$i"))) | ||
|
||
| } else { | ||
| intercept[IOException](sql("select * from tbl2").show()) | ||
|
||
| } | ||
|
|
||
| val l2DirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl3( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS parquet | ||
| |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin | ||
| sql(l2DirStatement) | ||
| if (parquetConversion == "true") { | ||
| checkAnswer(sql("select * from tbl3"), | ||
|
||
| (3 to 4).map(i => Row(i, i, s"parq$i"))) | ||
| } else { | ||
| intercept[IOException](sql("select * from tbl3").show()) | ||
|
||
| } | ||
|
|
||
| val wildcardTopDirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl4( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS parquet | ||
| |LOCATION '${new File(s"${path}/*").toURI}'""".stripMargin | ||
| sql(wildcardTopDirStatement) | ||
| if (parquetConversion == "true") { | ||
| checkAnswer(sql("select * from tbl4"), | ||
| (1 to 2).map(i => Row(i, i, s"parq$i"))) | ||
| } else { | ||
| intercept[IOException](sql("select * from tbl4").show()) | ||
| } | ||
|
|
||
| val wildcardL1DirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl5( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS parquet | ||
| |LOCATION '${new File(s"${path}/l1/*").toURI}'""".stripMargin | ||
| sql(wildcardL1DirStatement) | ||
| if (parquetConversion == "true") { | ||
| checkAnswer(sql("select * from tbl5"), | ||
| (1 to 4).map(i => Row(i, i, s"parq$i"))) | ||
| } else { | ||
| intercept[IOException](sql("select * from tbl5").show()) | ||
| } | ||
|
|
||
| val wildcardL2DirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl6( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS parquet | ||
| |LOCATION '${new File(s"${path}/l1/l2/*").toURI}'""".stripMargin | ||
| sql(wildcardL2DirStatement) | ||
| checkAnswer(sql("select * from tbl6"), | ||
| (3 to 6).map(i => Row(i, i, s"parq$i"))) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -170,4 +170,155 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { | |
| test("SPARK-11412 read and merge orc schemas in parallel") { | ||
| testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel) | ||
| } | ||
|
|
||
| test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { | ||
| Seq(true, false).foreach { convertMetastore => | ||
| withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") { | ||
| withTempDir { dir => | ||
| try { | ||
| hiveClient.runSqlHive("USE default") | ||
| hiveClient.runSqlHive( | ||
|
||
| """ | ||
| |CREATE EXTERNAL TABLE hive_orc( | ||
|
||
| | C1 INT, | ||
| | C2 INT, | ||
| | C3 STRING) | ||
| |STORED AS orc""".stripMargin) | ||
| // Hive throws an exception if I assign the location in the create table statement. | ||
| hiveClient.runSqlHive( | ||
| s"ALTER TABLE hive_orc SET LOCATION " + | ||
| s"'${new File(s"${dir.getCanonicalPath}/l1/").toURI}'") | ||
| hiveClient.runSqlHive( | ||
| """ | ||
| |INSERT INTO TABLE hive_orc | ||
| |VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin) | ||
|
|
||
| hiveClient.runSqlHive( | ||
| s"ALTER TABLE hive_orc SET LOCATION " + | ||
| s"'${new File(s"${dir.getCanonicalPath}/l1/l2/").toURI}'") | ||
| hiveClient.runSqlHive( | ||
| """ | ||
| |INSERT INTO TABLE hive_orc | ||
| |VALUES (3, 3, 'orc3'), (4, 4, 'orc4')""".stripMargin) | ||
|
|
||
| hiveClient.runSqlHive( | ||
| s"ALTER TABLE hive_orc SET LOCATION " + | ||
| s"'${new File(s"${dir.getCanonicalPath}/l1/l2/l3/").toURI}'") | ||
| hiveClient.runSqlHive( | ||
| """ | ||
| |INSERT INTO TABLE hive_orc | ||
| |VALUES (5, 5, 'orc5'), (6, 6, 'orc6')""".stripMargin) | ||
|
|
||
| withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") { | ||
| val topDirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl1( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin | ||
| sql(topDirStatement) | ||
| val topDirSqlStatement = s"select * from tbl1" | ||
| if (convertMetastore) { | ||
| checkAnswer(sql(topDirSqlStatement), Nil) | ||
| } else { | ||
| checkAnswer(sql(topDirSqlStatement), | ||
| (1 to 6).map(i => Row(i, i, s"orc$i"))) | ||
| } | ||
|
|
||
| val l1DirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl2( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin | ||
| sql(l1DirStatement) | ||
| val l1DirSqlStatement = s"select * from tbl2" | ||
| if (convertMetastore) { | ||
| checkAnswer(sql(l1DirSqlStatement), | ||
| (1 to 2).map(i => Row(i, i, s"orc$i"))) | ||
|
||
| } else { | ||
| checkAnswer(sql(l1DirSqlStatement), | ||
| (1 to 6).map(i => Row(i, i, s"orc$i"))) | ||
|
||
| } | ||
|
|
||
| val l2DirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl3( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin | ||
| sql(l2DirStatement) | ||
| val l2DirSqlStatement = s"select * from tbl3" | ||
| if (convertMetastore) { | ||
| checkAnswer(sql(l2DirSqlStatement), | ||
| (3 to 4).map(i => Row(i, i, s"orc$i"))) | ||
|
||
| } else { | ||
| checkAnswer(sql(l2DirSqlStatement), | ||
| (3 to 6).map(i => Row(i, i, s"orc$i"))) | ||
|
||
| } | ||
|
|
||
| val wildcardTopDirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl4( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin | ||
| sql(wildcardTopDirStatement) | ||
| val wildcardTopDirSqlStatement = s"select * from tbl4" | ||
| if (convertMetastore) { | ||
| checkAnswer(sql(wildcardTopDirSqlStatement), | ||
| (1 to 2).map(i => Row(i, i, s"orc$i"))) | ||
|
||
| } else { | ||
| checkAnswer(sql(wildcardTopDirSqlStatement), Nil) | ||
| } | ||
|
|
||
| val wildcardL1DirStatement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl5( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '${new File(s"${dir}/l1/*").toURI}'""".stripMargin | ||
| sql(wildcardL1DirStatement) | ||
| val wildcardL1DirSqlStatement = s"select * from tbl5" | ||
| if (convertMetastore) { | ||
| checkAnswer(sql(wildcardL1DirSqlStatement), | ||
| (1 to 4).map(i => Row(i, i, s"orc$i"))) | ||
|
||
| } else { | ||
| checkAnswer(sql(wildcardL1DirSqlStatement), Nil) | ||
| } | ||
|
|
||
| val wildcardL2Statement = | ||
| s""" | ||
| |CREATE EXTERNAL TABLE tbl6( | ||
| | c1 int, | ||
| | c2 int, | ||
| | c3 string) | ||
| |STORED AS orc | ||
| |LOCATION '${new File(s"${dir}/l1/l2/*").toURI}'""".stripMargin | ||
| sql(wildcardL2Statement) | ||
| val wildcardL2SqlStatement = s"select * from tbl6" | ||
| if (convertMetastore) { | ||
| checkAnswer(sql(wildcardL2SqlStatement), | ||
| (3 to 6).map(i => Row(i, i, s"orc$i"))) | ||
|
||
| } else { | ||
| checkAnswer(sql(wildcardL2SqlStatement), Nil) | ||
| } | ||
| } | ||
| } finally { | ||
|
||
| hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc") | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall we capitalize the SQL statement like
SELECT * FROM tbl1?