From 09c621bae172562b9dba0da91a527465d0ad7702 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 28 Sep 2017 10:23:52 -0700 Subject: [PATCH 1/5] [SPARK-22158][SQL] convertMetastoreOrc should not ignore table properties --- .../spark/sql/hive/HiveStrategies.scala | 2 +- .../sql/hive/execution/HiveDDLSuite.scala | 73 ++++++++++--------- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 805b3171cdaa..31a7fac0c422 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -194,7 +194,7 @@ case class RelationConversions( sessionCatalog.metastoreCatalog .convertToLogicalRelation(relation, options, classOf[ParquetFileFormat], "parquet") } else { - val options = Map[String, String]() + val options = relation.tableMeta.storage.properties sessionCatalog.metastoreCatalog .convertToLogicalRelation(relation, options, classOf[OrcFileFormat], "orc") } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 668da5fb4732..feaeb06c02db 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils} -import org.apache.spark.sql.hive.HiveExternalCatalog +import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} import org.apache.spark.sql.hive.orc.OrcFileOperator import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} @@ -1438,39 +1438,44 @@ class HiveDDLSuite } test("create hive serde table with new syntax") { - withTable("t", "t2", "t3") { - withTempPath { path => - sql( - s""" - |CREATE TABLE t(id int) USING hive - |OPTIONS(fileFormat 'orc', compression 'Zlib') - |LOCATION '${path.toURI}' - """.stripMargin) - val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) - assert(DDLUtils.isHiveTable(table)) - assert(table.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) - assert(table.storage.properties.get("compression") == Some("Zlib")) - assert(spark.table("t").collect().isEmpty) - - sql("INSERT INTO t SELECT 1") - checkAnswer(spark.table("t"), Row(1)) - // Check if this is compressed as ZLIB. - val maybeOrcFile = path.listFiles().find(!_.getName.endsWith(".crc")) - assert(maybeOrcFile.isDefined) - val orcFilePath = maybeOrcFile.get.toPath.toString - val expectedCompressionKind = - OrcFileOperator.getFileReader(orcFilePath).get.getCompression - assert("ZLIB" === expectedCompressionKind.name()) - - sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2") - val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2")) - assert(DDLUtils.isHiveTable(table2)) - assert(table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) - checkAnswer(spark.table("t2"), Row(1, "a")) - - sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)") - sql("INSERT INTO t3 PARTITION(p=1) SELECT 0") - checkAnswer(spark.table("t3"), Row(0, 1)) + Seq("true", "false").foreach { value => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) { + withTable("t", "t2", "t3") { + withTempPath { path => + sql( + s""" + |CREATE TABLE t(id int) USING hive + |OPTIONS(fileFormat 'orc', compression 'Zlib') + |LOCATION '${path.toURI}' + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(DDLUtils.isHiveTable(table)) + assert(table.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) + assert(table.storage.properties.get("compression") == Some("Zlib")) + assert(spark.table("t").collect().isEmpty) + + sql("INSERT INTO t SELECT 1") + checkAnswer(spark.table("t"), Row(1)) + // Check if this is compressed as ZLIB. + val maybeOrcFile = path.listFiles().find(_.getName.startsWith("part")) + assert(maybeOrcFile.isDefined) + val orcFilePath = maybeOrcFile.get.toPath.toString + val expectedCompressionKind = + OrcFileOperator.getFileReader(orcFilePath).get.getCompression + assert("ZLIB" === expectedCompressionKind.name()) + + sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2") + val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2")) + assert(DDLUtils.isHiveTable(table2)) + assert( + table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + checkAnswer(spark.table("t2"), Row(1, "a")) + + sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)") + sql("INSERT INTO t3 PARTITION(p=1) SELECT 0") + checkAnswer(spark.table("t3"), Row(0, 1)) + } + } } } } From 144868eeda4e02c92bb048d9c46625dea28aa3cb Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 29 Sep 2017 10:06:54 -0700 Subject: [PATCH 2/5] fix parquet, too. --- .../spark/sql/hive/HiveStrategies.scala | 2 +- .../sql/hive/execution/HiveDDLSuite.scala | 44 ++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 31a7fac0c422..3592b8f4846d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -189,7 +189,7 @@ case class RelationConversions( private def convert(relation: HiveTableRelation): LogicalRelation = { val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT) if (serde.contains("parquet")) { - val options = Map(ParquetOptions.MERGE_SCHEMA -> + val options = relation.tableMeta.storage.properties + (ParquetOptions.MERGE_SCHEMA -> conf.getConf(HiveUtils.CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING).toString) sessionCatalog.metastoreCatalog .convertToLogicalRelation(relation, options, classOf[ParquetFileFormat], "parquet") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index feaeb06c02db..a7edf10974e2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -23,6 +23,8 @@ import java.net.URI import scala.language.existentials import org.apache.hadoop.fs.Path +import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER +import org.apache.parquet.hadoop.ParquetFileReader import org.scalatest.BeforeAndAfterEach import org.apache.spark.SparkException @@ -1437,7 +1439,47 @@ class HiveDDLSuite } } - test("create hive serde table with new syntax") { + test("create hive serde table with new syntax - parquet") { + withTable("t", "t2", "t3") { + withTempPath { path => + sql( + s""" + |CREATE TABLE t(id int) USING hive + |OPTIONS(fileFormat 'parquet', compression 'gzip') + |LOCATION '${path.toURI}' + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(DDLUtils.isHiveTable(table)) + assert(table.storage.serde == + Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) + assert(table.storage.properties.get("compression") == Some("gzip")) + assert(spark.table("t").collect().isEmpty) + + sql("INSERT INTO t SELECT 1") + checkAnswer(spark.table("t"), Row(1)) + val maybeParquetFile = path.listFiles().find(f => f.getName.startsWith("part")) + assert(maybeParquetFile.isDefined) + + val footer = ParquetFileReader.readFooter( + sparkContext.hadoopConfiguration, + new Path(maybeParquetFile.get.getPath), + NO_FILTER) + assert("GZIP" === footer.getBlocks.get(0).getColumns().get(0).getCodec.toString) + + sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2") + val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2")) + assert(DDLUtils.isHiveTable(table2)) + assert(table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + checkAnswer(spark.table("t2"), Row(1, "a")) + + sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)") + sql("INSERT INTO t3 PARTITION(p=1) SELECT 0") + checkAnswer(spark.table("t3"), Row(0, 1)) + } + } + } + + test("create hive serde table with new syntax - orc") { Seq("true", "false").foreach { value => withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) { withTable("t", "t2", "t3") { From d218c98d7af75ffc13e9900a9370885f83f55dc6 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 29 Sep 2017 10:09:25 -0700 Subject: [PATCH 3/5] move test case position for review. --- .../sql/hive/execution/HiveDDLSuite.scala | 80 +++++++++---------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index a7edf10974e2..f85750ce034a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -1439,46 +1439,6 @@ class HiveDDLSuite } } - test("create hive serde table with new syntax - parquet") { - withTable("t", "t2", "t3") { - withTempPath { path => - sql( - s""" - |CREATE TABLE t(id int) USING hive - |OPTIONS(fileFormat 'parquet', compression 'gzip') - |LOCATION '${path.toURI}' - """.stripMargin) - val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) - assert(DDLUtils.isHiveTable(table)) - assert(table.storage.serde == - Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) - assert(table.storage.properties.get("compression") == Some("gzip")) - assert(spark.table("t").collect().isEmpty) - - sql("INSERT INTO t SELECT 1") - checkAnswer(spark.table("t"), Row(1)) - val maybeParquetFile = path.listFiles().find(f => f.getName.startsWith("part")) - assert(maybeParquetFile.isDefined) - - val footer = ParquetFileReader.readFooter( - sparkContext.hadoopConfiguration, - new Path(maybeParquetFile.get.getPath), - NO_FILTER) - assert("GZIP" === footer.getBlocks.get(0).getColumns().get(0).getCodec.toString) - - sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2") - val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2")) - assert(DDLUtils.isHiveTable(table2)) - assert(table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) - checkAnswer(spark.table("t2"), Row(1, "a")) - - sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)") - sql("INSERT INTO t3 PARTITION(p=1) SELECT 0") - checkAnswer(spark.table("t3"), Row(0, 1)) - } - } - } - test("create hive serde table with new syntax - orc") { Seq("true", "false").foreach { value => withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) { @@ -1522,6 +1482,46 @@ class HiveDDLSuite } } + test("create hive serde table with new syntax - parquet") { + withTable("t", "t2", "t3") { + withTempPath { path => + sql( + s""" + |CREATE TABLE t(id int) USING hive + |OPTIONS(fileFormat 'parquet', compression 'gzip') + |LOCATION '${path.toURI}' + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(DDLUtils.isHiveTable(table)) + assert(table.storage.serde == + Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) + assert(table.storage.properties.get("compression") == Some("gzip")) + assert(spark.table("t").collect().isEmpty) + + sql("INSERT INTO t SELECT 1") + checkAnswer(spark.table("t"), Row(1)) + val maybeParquetFile = path.listFiles().find(f => f.getName.startsWith("part")) + assert(maybeParquetFile.isDefined) + + val footer = ParquetFileReader.readFooter( + sparkContext.hadoopConfiguration, + new Path(maybeParquetFile.get.getPath), + NO_FILTER) + assert("GZIP" === footer.getBlocks.get(0).getColumns().get(0).getCodec.toString) + + sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2") + val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2")) + assert(DDLUtils.isHiveTable(table2)) + assert(table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + checkAnswer(spark.table("t2"), Row(1, "a")) + + sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)") + sql("INSERT INTO t3 PARTITION(p=1) SELECT 0") + checkAnswer(spark.table("t3"), Row(0, 1)) + } + } + } + test("create hive serde table with Catalog") { withTable("t") { withTempDir { dir => From 452a00376e0cf6e743da18f80cc44900390df948 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 2 Oct 2017 11:23:52 -0700 Subject: [PATCH 4/5] Update test cases. --- .../sql/hive/execution/HiveDDLSuite.scala | 120 +++++++++--------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index f85750ce034a..8a613ffaadd4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -33,7 +33,8 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils} -import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} +import org.apache.spark.sql.hive.HiveExternalCatalog +import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, CONVERT_METASTORE_PARQUET} import org.apache.spark.sql.hive.orc.OrcFileOperator import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} @@ -1439,75 +1440,30 @@ class HiveDDLSuite } } - test("create hive serde table with new syntax - orc") { - Seq("true", "false").foreach { value => - withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) { - withTable("t", "t2", "t3") { - withTempPath { path => - sql( - s""" - |CREATE TABLE t(id int) USING hive - |OPTIONS(fileFormat 'orc', compression 'Zlib') - |LOCATION '${path.toURI}' - """.stripMargin) - val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) - assert(DDLUtils.isHiveTable(table)) - assert(table.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) - assert(table.storage.properties.get("compression") == Some("Zlib")) - assert(spark.table("t").collect().isEmpty) - - sql("INSERT INTO t SELECT 1") - checkAnswer(spark.table("t"), Row(1)) - // Check if this is compressed as ZLIB. - val maybeOrcFile = path.listFiles().find(_.getName.startsWith("part")) - assert(maybeOrcFile.isDefined) - val orcFilePath = maybeOrcFile.get.toPath.toString - val expectedCompressionKind = - OrcFileOperator.getFileReader(orcFilePath).get.getCompression - assert("ZLIB" === expectedCompressionKind.name()) - - sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2") - val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2")) - assert(DDLUtils.isHiveTable(table2)) - assert( - table2.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) - checkAnswer(spark.table("t2"), Row(1, "a")) - - sql("CREATE TABLE t3(a int, p int) USING hive PARTITIONED BY (p)") - sql("INSERT INTO t3 PARTITION(p=1) SELECT 0") - checkAnswer(spark.table("t3"), Row(0, 1)) - } - } - } - } - } - - test("create hive serde table with new syntax - parquet") { + test("create hive serde table with new syntax") { withTable("t", "t2", "t3") { withTempPath { path => sql( s""" - |CREATE TABLE t(id int) USING hive - |OPTIONS(fileFormat 'parquet', compression 'gzip') - |LOCATION '${path.toURI}' - """.stripMargin) + |CREATE TABLE t(id int) USING hive + |OPTIONS(fileFormat 'orc', compression 'Zlib') + |LOCATION '${path.toURI}' + """.stripMargin) val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) assert(DDLUtils.isHiveTable(table)) - assert(table.storage.serde == - Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) - assert(table.storage.properties.get("compression") == Some("gzip")) + assert(table.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) + assert(table.storage.properties.get("compression") == Some("Zlib")) assert(spark.table("t").collect().isEmpty) sql("INSERT INTO t SELECT 1") checkAnswer(spark.table("t"), Row(1)) - val maybeParquetFile = path.listFiles().find(f => f.getName.startsWith("part")) - assert(maybeParquetFile.isDefined) - - val footer = ParquetFileReader.readFooter( - sparkContext.hadoopConfiguration, - new Path(maybeParquetFile.get.getPath), - NO_FILTER) - assert("GZIP" === footer.getBlocks.get(0).getColumns().get(0).getCodec.toString) + // Check if this is compressed as ZLIB. + val maybeOrcFile = path.listFiles().find(_.getName.startsWith("part")) + assert(maybeOrcFile.isDefined) + val orcFilePath = maybeOrcFile.get.toPath.toString + val expectedCompressionKind = + OrcFileOperator.getFileReader(orcFilePath).get.getCompression + assert("ZLIB" === expectedCompressionKind.name()) sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2") val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2")) @@ -2056,4 +2012,48 @@ class HiveDDLSuite } } } + + private def assertCompression(maybeFile: Option[File], format: String, compression: String) = { + assert(maybeFile.isDefined) + + val actualCompression = format match { + case "orc" => + OrcFileOperator.getFileReader(maybeFile.get.toPath.toString).get.getCompression.name + + case "parquet" => + val footer = ParquetFileReader.readFooter( + sparkContext.hadoopConfiguration, new Path(maybeFile.get.getPath), NO_FILTER) + footer.getBlocks.get(0).getColumns.get(0).getCodec.toString + } + + assert(compression === actualCompression) + } + + // Since ORC uses 'ZLIB' and Parquet uses 'SNAPPY' by default, we test with different formats. + Seq(("orc", "SNAPPY"), ("parquet", "GZIP")).foreach { case (fileFormat, compression) => + test(s"SPARK-22158 convertMetastore should not ignore table property - $fileFormat") { + withSQLConf(CONVERT_METASTORE_ORC.key -> "true", CONVERT_METASTORE_PARQUET.key -> "true") { + withTable("t") { + withTempPath { path => + sql( + s""" + |CREATE TABLE t(id int) USING hive + |OPTIONS(fileFormat '$fileFormat', compression '$compression') + |LOCATION '${path.toURI}' + """.stripMargin) + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) + assert(DDLUtils.isHiveTable(table)) + assert(table.storage.serde.get.contains(fileFormat)) + assert(table.storage.properties.get("compression") == Some(compression)) + assert(spark.table("t").collect().isEmpty) + + sql("INSERT INTO t SELECT 1") + checkAnswer(spark.table("t"), Row(1)) + val maybeFile = path.listFiles().find(_.getName.startsWith("part")) + assertCompression(maybeFile, fileFormat, compression) + } + } + } + } + } } From 17ccf9aff9953af421b48262f8fd6af582bddf0a Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 2 Oct 2017 11:39:13 -0700 Subject: [PATCH 5/5] fix --- .../apache/spark/sql/hive/execution/HiveDDLSuite.scala | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 8a613ffaadd4..02e26bbe876a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -1459,11 +1459,7 @@ class HiveDDLSuite checkAnswer(spark.table("t"), Row(1)) // Check if this is compressed as ZLIB. val maybeOrcFile = path.listFiles().find(_.getName.startsWith("part")) - assert(maybeOrcFile.isDefined) - val orcFilePath = maybeOrcFile.get.toPath.toString - val expectedCompressionKind = - OrcFileOperator.getFileReader(orcFilePath).get.getCompression - assert("ZLIB" === expectedCompressionKind.name()) + assertCompression(maybeOrcFile, "orc", "ZLIB") sql("CREATE TABLE t2 USING HIVE AS SELECT 1 AS c1, 'a' AS c2") val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2")) @@ -2029,8 +2025,7 @@ class HiveDDLSuite assert(compression === actualCompression) } - // Since ORC uses 'ZLIB' and Parquet uses 'SNAPPY' by default, we test with different formats. - Seq(("orc", "SNAPPY"), ("parquet", "GZIP")).foreach { case (fileFormat, compression) => + Seq(("orc", "ZLIB"), ("parquet", "GZIP")).foreach { case (fileFormat, compression) => test(s"SPARK-22158 convertMetastore should not ignore table property - $fileFormat") { withSQLConf(CONVERT_METASTORE_ORC.key -> "true", CONVERT_METASTORE_PARQUET.key -> "true") { withTable("t") {