Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
move the test cases
  • Loading branch information
kevinyu98 committed Dec 5, 2018
commit 51d1d78d1e1c4f56f5f07dc18bc9fcbe9a00fbbf
Original file line number Diff line number Diff line change
Expand Up @@ -186,82 +186,6 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
}
}

protected def testORCTableLocation(isConvertMetastore: Boolean): Unit = {
withTempDir { dir =>
val someDF1 = Seq((1, 1, "orc1"), (2, 2, "orc2")).toDF("c1", "c2", "c3").repartition(1)
withTable("tbl1", "tbl2", "tbl3", "tbl4") {
val dataDir = s"${dir.getCanonicalPath}/l3/l2/l1/"
val parentDir = s"${dir.getCanonicalPath}/l3/l2/"
val l3Dir = s"${dir.getCanonicalPath}/l3/"
val wildcardParentDir = new File(s"${dir}/l3/l2/*").toURI
val wildcardL3Dir = new File(s"${dir}/l3/*").toURI
someDF1.write.orc(dataDir)
val parentDirStatement =
s"""
|CREATE EXTERNAL TABLE tbl1(
| c1 int,
| c2 int,
| c3 string)
|STORED AS orc
|LOCATION '${parentDir}'""".stripMargin
sql(parentDirStatement)
val parentDirSqlStatement = s"select * from tbl1"
if (isConvertMetastore) {
checkAnswer(sql(parentDirSqlStatement), Nil)
} else {
checkAnswer(sql(parentDirSqlStatement),
(1 to 2).map(i => Row(i, i, s"orc$i")))
}

val l3DirStatement =
s"""
|CREATE EXTERNAL TABLE tbl2(
| c1 int,
| c2 int,
| c3 string)
|STORED AS orc
|LOCATION '${l3Dir}'""".stripMargin
sql(l3DirStatement)
val l3DirSqlStatement = s"select * from tbl2"
if (isConvertMetastore) {
checkAnswer(sql(l3DirSqlStatement), Nil)
} else {
checkAnswer(sql(l3DirSqlStatement),
(1 to 2).map(i => Row(i, i, s"orc$i")))
}

val wildcardStatement =
s"""
|CREATE EXTERNAL TABLE tbl3(
| c1 int,
| c2 int,
| c3 string)
|STORED AS orc
|LOCATION '$wildcardParentDir'""".stripMargin
sql(wildcardStatement)
val wildcardSqlStatement = s"select * from tbl3"
if (isConvertMetastore) {
checkAnswer(sql(wildcardSqlStatement),
(1 to 2).map(i => Row(i, i, s"orc$i")))
} else {
checkAnswer(sql(wildcardSqlStatement), Nil)
}

val wildcardL3Statement =
s"""
|CREATE EXTERNAL TABLE tbl4(
| c1 int,
| c2 int,
| c3 string)
|STORED AS orc
|LOCATION '$wildcardL3Dir'""".stripMargin
sql(wildcardL3Statement)
val wildcardL3SqlStatement = s"select * from tbl4"
checkAnswer(sql(wildcardL3SqlStatement), Nil)
}
}
}

test("create temporary orc table") {
checkAnswer(sql("SELECT COUNT(*) FROM normal_orc_source"), Row(10))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import org.apache.spark.util.Utils
class HiveParquetSourceSuite extends ParquetPartitioningTest {
import testImplicits._
import spark._
import java.io.IOException
Copy link
Member

@dongjoon-hyun dongjoon-hyun Dec 6, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This had better go to line 20.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok


override def beforeAll(): Unit = {
super.beforeAll()
Expand Down Expand Up @@ -222,4 +223,66 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
assert(df4.columns === Array("str", "max_int"))
}
}

test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please fix this first for the first and second review comments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dongjoon-hyun Thanks for the comments, I have tried to make the changes for the first and second review comments, I changed both suites to make it looks similar, also add more test cases. For the 3rd comments, I haven't found a common place to both suites, when you say the help function missing in the previous commit, can you help to point what kind of help function I missed? Thanks.

withTempPath { path =>
withTable("tbl1", "tbl2", "tbl3") {
val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).
toDF("c1", "c2", "c3").repartition(1)
val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/"
val parentDir = s"${path.getCanonicalPath}/l3/l2/"
val wildcardParentDir = new File(s"${path}/l3/l2/*").toURI
val wildcardL3Dir = new File(s"${path}/l3/*").toURI
someDF1.write.parquet(dataDir)
val parentDirStatement =
s"""
|CREATE EXTERNAL TABLE tbl1(
| c1 int,
| c2 int,
| c3 string)
|STORED AS parquet
|LOCATION '${parentDir}'""".stripMargin
sql(parentDirStatement)
val wildcardStatement =
s"""
|CREATE EXTERNAL TABLE tbl2(
| c1 int,
| c2 int,
| c3 string)
|STORED AS parquet
|LOCATION '${wildcardParentDir}'""".stripMargin
sql(wildcardStatement)
val wildcardL3Statement =
s"""
|CREATE EXTERNAL TABLE tbl3(
| c1 int,
| c2 int,
| c3 string)
|STORED AS parquet
|LOCATION '${wildcardL3Dir}'""".stripMargin
sql(wildcardL3Statement)

Seq("true", "false").foreach { parquetConversion =>
withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) {
if (parquetConversion == "true") {
checkAnswer(sql("select * from tbl1"), Nil)
checkAnswer(sql("select * from tbl2"),
(1 to 2).map(i => Row(i, i, s"parq$i")))
checkAnswer(sql("select * from tbl3"), Nil)
} else {
Seq("select * from tbl1", "select * from tbl2", "select * from tbl3").foreach {
sqlStmt =>
try {
sql(sqlStmt)
} catch {
case e: IOException =>
assert(e.getMessage().contains("java.io.IOException: Not a file"))
}
Copy link
Member

@dongjoon-hyun dongjoon-hyun Dec 6, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevinyu98 . Is this testing exceptions for the above all three SQLs? We use intercept[IOException] to test expected Exceptions.

For now, this looks like not a robust test case, because there is no assert(false) after sql(sqlStmt). We need to check the individual query failure and success exactly for the specific configuration.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are right, I will make changes. Thanks.

}
}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2370,51 +2370,4 @@ class HiveDDLSuite
))
}
}

test("SPARK-25993 Add test cases for resolution of Parquet table location") {
withTempPath { path =>
val someDF1 = Seq((1, 1, "parq1"), (2, 2, "parq2")).toDF("c1", "c2", "c3").repartition(1)
withTable("tbl1", "tbl2", "tbl3") {
val dataDir = s"${path.getCanonicalPath}/l3/l2/l1/"
val parentDir = s"${path.getCanonicalPath}/l3/l2/"
val l3Dir = s"${path.getCanonicalPath}/l3/"
val wildcardParentDir = new File(s"${path}/l3/l2/*").toURI
val wildcardL3Dir = new File(s"${path}/l3/*").toURI
someDF1.write.parquet(dataDir)
val parentDirStatement =
s"""
|CREATE EXTERNAL TABLE tbl1(
| c1 int,
| c2 int,
| c3 string)
|STORED AS parquet
|LOCATION '${parentDir}'""".stripMargin
sql(parentDirStatement)
checkAnswer(sql("select * from tbl1"), Nil)

val wildcardStatement =
s"""
|CREATE EXTERNAL TABLE tbl2(
| c1 int,
| c2 int,
| c3 string)
|STORED AS parquet
|LOCATION '${wildcardParentDir}'""".stripMargin
sql(wildcardStatement)
checkAnswer(sql("select * from tbl2"),
(1 to 2).map(i => Row(i, i, s"parq$i")))

val wildcardL3Statement =
s"""
|CREATE EXTERNAL TABLE tbl3(
| c1 int,
| c2 int,
| c3 string)
|STORED AS parquet
|LOCATION '${wildcardL3Dir}'""".stripMargin
sql(wildcardL3Statement)
checkAnswer(sql("select * from tbl3"), Nil)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
sql(
s"""
|CREATE TABLE $tableName

|USING org.apache.spark.sql.hive.orc
|OPTIONS (
| PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}'
| PATH '${new File(orcTableAsDir.getAbsolutePath
).toURI}'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The above change in line 76 ~ 80 looks strange and irrelevant. Let's revert this change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch, I didn't notice my changes affect the formatting in the file. I have revert the change. Thanks

|)
""".stripMargin)

Expand Down Expand Up @@ -191,10 +193,101 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
}
}

test("SPARK-25993 Add test cases for resolution of ORC table location") {
test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
Seq(true, false).foreach { convertMetastore =>
withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") {
testORCTableLocation(convertMetastore)
withTempDir { dir =>
val dataDir = new File(s"${dir.getCanonicalPath}/l3/l2/l1/").toURI
val parentDir = s"${dir.getCanonicalPath}/l3/l2/"
val l3Dir = s"${dir.getCanonicalPath}/l3/"
val wildcardParentDir = new File(s"${dir}/l3/l2/*").toURI
val wildcardL3Dir = new File(s"${dir}/l3/*").toURI

try {
hiveClient.runSqlHive("USE default")
hiveClient.runSqlHive(
"""
|CREATE EXTERNAL TABLE hive_orc(
| C1 INT,
| C2 INT,
| C3 STRING)
|STORED AS orc""".stripMargin)
// Hive throws an exception if I assign the location in the create table statement.
hiveClient.runSqlHive(
s"ALTER TABLE hive_orc SET LOCATION '$dataDir'")
hiveClient.runSqlHive(
"""
|INSERT INTO TABLE hive_orc
|VALUES (1, 1, 'orc1'), (2, 2, 'orc2')""".stripMargin)

withTable("tbl1", "tbl2", "tbl3", "tbl4") {
val parentDirStatement =
s"""
|CREATE EXTERNAL TABLE tbl1(
| c1 int,
| c2 int,
| c3 string)
|STORED AS orc
|LOCATION '${parentDir}'""".stripMargin
sql(parentDirStatement)
val parentDirSqlStatement = s"select * from tbl1"
if (convertMetastore) {
checkAnswer(sql(parentDirSqlStatement), Nil)
} else {
checkAnswer(sql(parentDirSqlStatement),
(1 to 2).map(i => Row(i, i, s"orc$i")))
}

val l3DirStatement =
s"""
|CREATE EXTERNAL TABLE tbl2(
| c1 int,
| c2 int,
| c3 string)
|STORED AS orc
|LOCATION '${l3Dir}'""".stripMargin
sql(l3DirStatement)
val l3DirSqlStatement = s"select * from tbl2"
if (convertMetastore) {
checkAnswer(sql(l3DirSqlStatement), Nil)
} else {
checkAnswer(sql(l3DirSqlStatement),
(1 to 2).map(i => Row(i, i, s"orc$i")))
}

val wildcardStatement =
s"""
|CREATE EXTERNAL TABLE tbl3(
| c1 int,
| c2 int,
| c3 string)
|STORED AS orc
|LOCATION '$wildcardParentDir'""".stripMargin
sql(wildcardStatement)
val wildcardSqlStatement = s"select * from tbl3"
if (convertMetastore) {
checkAnswer(sql(wildcardSqlStatement),
(1 to 2).map(i => Row(i, i, s"orc$i")))
} else {
checkAnswer(sql(wildcardSqlStatement), Nil)
}

val wildcardL3Statement =
s"""
|CREATE EXTERNAL TABLE tbl4(
| c1 int,
| c2 int,
| c3 string)
|STORED AS orc
|LOCATION '$wildcardL3Dir'""".stripMargin
sql(wildcardL3Statement)
val wildcardL3SqlStatement = s"select * from tbl4"
checkAnswer(sql(wildcardL3SqlStatement), Nil)
}
} finally {
hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that we need to clean up tbl1 ~ tbl4, too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dongjoon-hyun at line 221, I put the tbl1 ~ tbl4 with the withTable, I think it will get dropped. I tried to run it couple time in intellij, it seems work fine. what do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. I missed that.

}
}
}
}
}
Expand Down