From 09c621bae172562b9dba0da91a527465d0ad7702 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 28 Sep 2017 10:23:52 -0700
Subject: [PATCH 1/5] [SPARK-22158][SQL] convertMetastoreOrc should not ignore
 table properties

---
 .../spark/sql/hive/HiveStrategies.scala       |  2 +-
 .../sql/hive/execution/HiveDDLSuite.scala     | 73 ++++++++++---------
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 805b3171cdaa..31a7fac0c422 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -194,7 +194,7 @@ case class RelationConversions(
       sessionCatalog.metastoreCatalog
         .convertToLogicalRelation(relation, options, classOf[ParquetFileFormat], "parquet")
     } else {
-      val options = Map[String, String]()
+      val options = relation.tableMeta.storage.properties
       sessionCatalog.metastoreCatalog
         .convertToLogicalRelation(relation, options, classOf[OrcFileFormat], "orc")
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 668da5fb4732..feaeb06c02db 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
-import org.apache.spark.sql.hive.HiveExternalCatalog
+import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
 import org.apache.spark.sql.hive.orc.OrcFileOperator
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
@@ -1438,39 +1438,44 @@ class HiveDDLSuite
   }
 
   test("create hive serde table with new syntax") {
-    withTable("t", "t2", "t3") {
-      withTempPath { path =>
-        sql(
-          s"""
-            |CREATE TABLE t(id int) USING hive
-            |OPTIONS(fileFormat 'orc', compression 'Zlib')
-            |LOCATION '${path.toURI}'
-          """.stripMargin)
-        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
-        assert(DDLUtils.isHiveTable(table))
-        assert(table.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
-        assert(table.storage.properties.get("compression") == Some("Zlib"))
-        assert(spark.table("t").collect().isEmpty)
-
-        sql("INSERT INTO t SELECT 1")
-        checkAnswer(spark.table("t"), Row(1))
-        // Check if this is compressed as ZLIB.
-        val maybeOrcFile = path.listFiles().find(!_.getName.endsWith(".crc"))
-        assert(maybeOrcFile.isDefined)
-        val orcFilePath = maybeOrcFile.get.toPath.toString
-        val expectedCompressionKind =
-          OrcFileOperator.getFileReader(orcFilePath).get.getCompression
-        assert("ZLIB" === expectedCompressionKind.name())
-
-        sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2")
-        val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
-        assert(DDLUtils.isHiveTable(table2))
-        assert(table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-        checkAnswer(spark.table("t2"), Row(1, "a"))
-
-        sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)")
-        sql("INSERT INTO t3 PARTITION(p=1) SELECT 0")
-        checkAnswer(spark.table("t3"), Row(0, 1))
+    Seq("true", "false").foreach { value =>
+      withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
+        withTable("t", "t2", "t3") {
+          withTempPath { path =>
+            sql(
+              s"""
+                |CREATE TABLE t(id int) USING hive
+                |OPTIONS(fileFormat 'orc', compression 'Zlib')
+                |LOCATION '${path.toURI}'
+               """.stripMargin)
+            val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+            assert(DDLUtils.isHiveTable(table))
+            assert(table.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+            assert(table.storage.properties.get("compression") == Some("Zlib"))
+            assert(spark.table("t").collect().isEmpty)
+
+            sql("INSERT INTO t SELECT 1")
+            checkAnswer(spark.table("t"), Row(1))
+            // Check if this is compressed as ZLIB.
+            val maybeOrcFile = path.listFiles().find(_.getName.startsWith("part"))
+            assert(maybeOrcFile.isDefined)
+            val orcFilePath = maybeOrcFile.get.toPath.toString
+            val expectedCompressionKind =
+              OrcFileOperator.getFileReader(orcFilePath).get.getCompression
+            assert("ZLIB" === expectedCompressionKind.name())
+
+            sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2")
+            val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
+            assert(DDLUtils.isHiveTable(table2))
+            assert(
+              table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+            checkAnswer(spark.table("t2"), Row(1, "a"))
+
+            sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)")
+            sql("INSERT INTO t3 PARTITION(p=1) SELECT 0")
+            checkAnswer(spark.table("t3"), Row(0, 1))
+          }
+        }
       }
     }
   }

From 144868eeda4e02c92bb048d9c46625dea28aa3cb Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 29 Sep 2017 10:06:54 -0700
Subject: [PATCH 2/5] fix parquet, too.

---
 .../spark/sql/hive/HiveStrategies.scala       |  2 +-
 .../sql/hive/execution/HiveDDLSuite.scala     | 44 ++++++++++++++++++-
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 31a7fac0c422..3592b8f4846d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -189,7 +189,7 @@ case class RelationConversions(
   private def convert(relation: HiveTableRelation): LogicalRelation = {
     val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
     if (serde.contains("parquet")) {
-      val options = Map(ParquetOptions.MERGE_SCHEMA ->
+      val options = relation.tableMeta.storage.properties + (ParquetOptions.MERGE_SCHEMA ->
         conf.getConf(HiveUtils.CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING).toString)
       sessionCatalog.metastoreCatalog
         .convertToLogicalRelation(relation, options, classOf[ParquetFileFormat], "parquet")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index feaeb06c02db..a7edf10974e2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -23,6 +23,8 @@ import java.net.URI
 import scala.language.existentials
 
 import org.apache.hadoop.fs.Path
+import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER
+import org.apache.parquet.hadoop.ParquetFileReader
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.SparkException
@@ -1437,7 +1439,47 @@ class HiveDDLSuite
     }
   }
 
-  test("create hive serde table with new syntax") {
+  test("create hive serde table with new syntax - parquet") {
+    withTable("t", "t2", "t3") {
+      withTempPath { path =>
+        sql(
+          s"""
+             |CREATE TABLE t(id int) USING hive
+             |OPTIONS(fileFormat 'parquet', compression 'gzip')
+             |LOCATION '${path.toURI}'
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        assert(DDLUtils.isHiveTable(table))
+        assert(table.storage.serde ==
+          Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+        assert(table.storage.properties.get("compression") == Some("gzip"))
+        assert(spark.table("t").collect().isEmpty)
+
+        sql("INSERT INTO t SELECT 1")
+        checkAnswer(spark.table("t"), Row(1))
+        val maybeParquetFile = path.listFiles().find(f => f.getName.startsWith("part"))
+        assert(maybeParquetFile.isDefined)
+
+        val footer = ParquetFileReader.readFooter(
+          sparkContext.hadoopConfiguration,
+          new Path(maybeParquetFile.get.getPath),
+          NO_FILTER)
+        assert("GZIP" === footer.getBlocks.get(0).getColumns().get(0).getCodec.toString)
+
+        sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2")
+        val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
+        assert(DDLUtils.isHiveTable(table2))
+        assert(table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+        checkAnswer(spark.table("t2"), Row(1, "a"))
+
+        sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)")
+        sql("INSERT INTO t3 PARTITION(p=1) SELECT 0")
+        checkAnswer(spark.table("t3"), Row(0, 1))
+      }
+    }
+  }
+
+  test("create hive serde table with new syntax - orc") {
     Seq("true", "false").foreach { value =>
       withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
         withTable("t", "t2", "t3") {

From d218c98d7af75ffc13e9900a9370885f83f55dc6 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 29 Sep 2017 10:09:25 -0700
Subject: [PATCH 3/5] move test case position for review.

---
 .../sql/hive/execution/HiveDDLSuite.scala     | 80 +++++++++----------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index a7edf10974e2..f85750ce034a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1439,46 +1439,6 @@ class HiveDDLSuite
     }
   }
 
-  test("create hive serde table with new syntax - parquet") {
-    withTable("t", "t2", "t3") {
-      withTempPath { path =>
-        sql(
-          s"""
-             |CREATE TABLE t(id int) USING hive
-             |OPTIONS(fileFormat 'parquet', compression 'gzip')
-             |LOCATION '${path.toURI}'
-           """.stripMargin)
-        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
-        assert(DDLUtils.isHiveTable(table))
-        assert(table.storage.serde ==
-          Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
-        assert(table.storage.properties.get("compression") == Some("gzip"))
-        assert(spark.table("t").collect().isEmpty)
-
-        sql("INSERT INTO t SELECT 1")
-        checkAnswer(spark.table("t"), Row(1))
-        val maybeParquetFile = path.listFiles().find(f => f.getName.startsWith("part"))
-        assert(maybeParquetFile.isDefined)
-
-        val footer = ParquetFileReader.readFooter(
-          sparkContext.hadoopConfiguration,
-          new Path(maybeParquetFile.get.getPath),
-          NO_FILTER)
-        assert("GZIP" === footer.getBlocks.get(0).getColumns().get(0).getCodec.toString)
-
-        sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2")
-        val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
-        assert(DDLUtils.isHiveTable(table2))
-        assert(table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-        checkAnswer(spark.table("t2"), Row(1, "a"))
-
-        sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)")
-        sql("INSERT INTO t3 PARTITION(p=1) SELECT 0")
-        checkAnswer(spark.table("t3"), Row(0, 1))
-      }
-    }
-  }
-
   test("create hive serde table with new syntax - orc") {
     Seq("true", "false").foreach { value =>
       withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
@@ -1522,6 +1482,46 @@ class HiveDDLSuite
     }
   }
 
+  test("create hive serde table with new syntax - parquet") {
+    withTable("t", "t2", "t3") {
+      withTempPath { path =>
+        sql(
+          s"""
+             |CREATE TABLE t(id int) USING hive
+             |OPTIONS(fileFormat 'parquet', compression 'gzip')
+             |LOCATION '${path.toURI}'
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        assert(DDLUtils.isHiveTable(table))
+        assert(table.storage.serde ==
+          Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
+        assert(table.storage.properties.get("compression") == Some("gzip"))
+        assert(spark.table("t").collect().isEmpty)
+
+        sql("INSERT INTO t SELECT 1")
+        checkAnswer(spark.table("t"), Row(1))
+        val maybeParquetFile = path.listFiles().find(f => f.getName.startsWith("part"))
+        assert(maybeParquetFile.isDefined)
+
+        val footer = ParquetFileReader.readFooter(
+          sparkContext.hadoopConfiguration,
+          new Path(maybeParquetFile.get.getPath),
+          NO_FILTER)
+        assert("GZIP" === footer.getBlocks.get(0).getColumns().get(0).getCodec.toString)
+
+        sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2")
+        val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
+        assert(DDLUtils.isHiveTable(table2))
+        assert(table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+        checkAnswer(spark.table("t2"), Row(1, "a"))
+
+        sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)")
+        sql("INSERT INTO t3 PARTITION(p=1) SELECT 0")
+        checkAnswer(spark.table("t3"), Row(0, 1))
+      }
+    }
+  }
+
   test("create hive serde table with Catalog") {
     withTable("t") {
       withTempDir { dir =>

From 452a00376e0cf6e743da18f80cc44900390df948 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 2 Oct 2017 11:23:52 -0700
Subject: [PATCH 4/5] Update test cases.

---
 .../sql/hive/execution/HiveDDLSuite.scala     | 120 +++++++++---------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index f85750ce034a..8a613ffaadd4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -33,7 +33,8 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
-import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
+import org.apache.spark.sql.hive.HiveExternalCatalog
+import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, CONVERT_METASTORE_PARQUET}
 import org.apache.spark.sql.hive.orc.OrcFileOperator
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
@@ -1439,75 +1440,30 @@ class HiveDDLSuite
     }
   }
 
-  test("create hive serde table with new syntax - orc") {
-    Seq("true", "false").foreach { value =>
-      withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
-        withTable("t", "t2", "t3") {
-          withTempPath { path =>
-            sql(
-              s"""
-                |CREATE TABLE t(id int) USING hive
-                |OPTIONS(fileFormat 'orc', compression 'Zlib')
-                |LOCATION '${path.toURI}'
-               """.stripMargin)
-            val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
-            assert(DDLUtils.isHiveTable(table))
-            assert(table.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
-            assert(table.storage.properties.get("compression") == Some("Zlib"))
-            assert(spark.table("t").collect().isEmpty)
-
-            sql("INSERT INTO t SELECT 1")
-            checkAnswer(spark.table("t"), Row(1))
-            // Check if this is compressed as ZLIB.
-            val maybeOrcFile = path.listFiles().find(_.getName.startsWith("part"))
-            assert(maybeOrcFile.isDefined)
-            val orcFilePath = maybeOrcFile.get.toPath.toString
-            val expectedCompressionKind =
-              OrcFileOperator.getFileReader(orcFilePath).get.getCompression
-            assert("ZLIB" === expectedCompressionKind.name())
-
-            sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2")
-            val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
-            assert(DDLUtils.isHiveTable(table2))
-            assert(
-              table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-            checkAnswer(spark.table("t2"), Row(1, "a"))
-
-            sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)")
-            sql("INSERT INTO t3 PARTITION(p=1) SELECT 0")
-            checkAnswer(spark.table("t3"), Row(0, 1))
-          }
-        }
-      }
-    }
-  }
-
-  test("create hive serde table with new syntax - parquet") {
+  test("create hive serde table with new syntax") {
     withTable("t", "t2", "t3") {
       withTempPath { path =>
         sql(
           s"""
-             |CREATE TABLE t(id int) USING hive
-             |OPTIONS(fileFormat 'parquet', compression 'gzip')
-             |LOCATION '${path.toURI}'
-           """.stripMargin)
+            |CREATE TABLE t(id int) USING hive
+            |OPTIONS(fileFormat 'orc', compression 'Zlib')
+            |LOCATION '${path.toURI}'
+          """.stripMargin)
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
         assert(DDLUtils.isHiveTable(table))
-        assert(table.storage.serde ==
-          Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
-        assert(table.storage.properties.get("compression") == Some("gzip"))
+        assert(table.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+        assert(table.storage.properties.get("compression") == Some("Zlib"))
         assert(spark.table("t").collect().isEmpty)
 
         sql("INSERT INTO t SELECT 1")
         checkAnswer(spark.table("t"), Row(1))
-        val maybeParquetFile = path.listFiles().find(f => f.getName.startsWith("part"))
-        assert(maybeParquetFile.isDefined)
-
-        val footer = ParquetFileReader.readFooter(
-          sparkContext.hadoopConfiguration,
-          new Path(maybeParquetFile.get.getPath),
-          NO_FILTER)
-        assert("GZIP" === footer.getBlocks.get(0).getColumns().get(0).getCodec.toString)
+        // Check if this is compressed as ZLIB.
+        val maybeOrcFile = path.listFiles().find(_.getName.startsWith("part"))
+        assert(maybeOrcFile.isDefined)
+        val orcFilePath = maybeOrcFile.get.toPath.toString
+        val expectedCompressionKind =
+          OrcFileOperator.getFileReader(orcFilePath).get.getCompression
+        assert("ZLIB" === expectedCompressionKind.name())
 
         sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2")
         val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
@@ -2056,4 +2012,48 @@ class HiveDDLSuite
       }
     }
   }
+
+  private def assertCompression(maybeFile: Option[File], format: String, compression: String) = {
+    assert(maybeFile.isDefined)
+
+    val actualCompression = format match {
+      case "orc" =>
+        OrcFileOperator.getFileReader(maybeFile.get.toPath.toString).get.getCompression.name
+
+      case "parquet" =>
+        val footer = ParquetFileReader.readFooter(
+          sparkContext.hadoopConfiguration, new Path(maybeFile.get.getPath), NO_FILTER)
+        footer.getBlocks.get(0).getColumns.get(0).getCodec.toString
+    }
+
+    assert(compression === actualCompression)
+  }
+
+  // Since ORC uses 'ZLIB' and Parquet uses 'SNAPPY' by default, we test with different formats.
+  Seq(("orc", "SNAPPY"), ("parquet", "GZIP")).foreach { case (fileFormat, compression) =>
+    test(s"SPARK-22158 convertMetastore should not ignore table property - $fileFormat") {
+      withSQLConf(CONVERT_METASTORE_ORC.key -> "true", CONVERT_METASTORE_PARQUET.key -> "true") {
+        withTable("t") {
+          withTempPath { path =>
+            sql(
+              s"""
+                |CREATE TABLE t(id int) USING hive
+                |OPTIONS(fileFormat '$fileFormat', compression '$compression')
+                |LOCATION '${path.toURI}'
+              """.stripMargin)
+            val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+            assert(DDLUtils.isHiveTable(table))
+            assert(table.storage.serde.get.contains(fileFormat))
+            assert(table.storage.properties.get("compression") == Some(compression))
+            assert(spark.table("t").collect().isEmpty)
+
+            sql("INSERT INTO t SELECT 1")
+            checkAnswer(spark.table("t"), Row(1))
+            val maybeFile = path.listFiles().find(_.getName.startsWith("part"))
+            assertCompression(maybeFile, fileFormat, compression)
+          }
+        }
+      }
+    }
+  }
 }

From 17ccf9aff9953af421b48262f8fd6af582bddf0a Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 2 Oct 2017 11:39:13 -0700
Subject: [PATCH 5/5] fix

---
 .../apache/spark/sql/hive/execution/HiveDDLSuite.scala   | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 8a613ffaadd4..02e26bbe876a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1459,11 +1459,7 @@ class HiveDDLSuite
         checkAnswer(spark.table("t"), Row(1))
         // Check if this is compressed as ZLIB.
         val maybeOrcFile = path.listFiles().find(_.getName.startsWith("part"))
-        assert(maybeOrcFile.isDefined)
-        val orcFilePath = maybeOrcFile.get.toPath.toString
-        val expectedCompressionKind =
-          OrcFileOperator.getFileReader(orcFilePath).get.getCompression
-        assert("ZLIB" === expectedCompressionKind.name())
+        assertCompression(maybeOrcFile, "orc", "ZLIB")
 
         sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2")
         val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
@@ -2029,8 +2025,7 @@ class HiveDDLSuite
     assert(compression === actualCompression)
   }
 
-  // Since ORC uses 'ZLIB' and Parquet uses 'SNAPPY' by default, we test with different formats.
-  Seq(("orc", "SNAPPY"), ("parquet", "GZIP")).foreach { case (fileFormat, compression) =>
+  Seq(("orc", "ZLIB"), ("parquet", "GZIP")).foreach { case (fileFormat, compression) =>
     test(s"SPARK-22158 convertMetastore should not ignore table property - $fileFormat") {
       withSQLConf(CONVERT_METASTORE_ORC.key -> "true", CONVERT_METASTORE_PARQUET.key -> "true") {
         withTable("t") {