Skip to content

Commit 8bc420a

Browse files
committed
Use spark.sql.orc.impl.
1 parent 2393e1d commit 8bc420a

File tree

6 files changed

+19
-18
lines changed

6 files changed

+19
-18
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -363,13 +363,13 @@ object SQLConf {
363363
.checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo"))
364364
.createWithDefault("snappy")
365365

366-
val ORC_USE_NEW_VERSION = buildConf("spark.sql.orc.useNewVersion")
367-
.doc("When true, use new OrcFileFormat in sql/core module instead of the one in sql/hive. " +
368-
"Since new OrcFileFormat uses Apache ORC library instead of ORC library Hive 1.2.1, it is " +
369-
"more stable and faster.")
366+
val ORC_IMPLEMENTATION = buildConf("spark.sql.orc.impl")
367+
.doc("When native, use the native version of ORC support instead of the ORC library in Hive " +
368+
"1.2.1. It is 'hive' by default prior to Spark 2.3.")
370369
.internal()
371-
.booleanConf
372-
.createWithDefault(true)
370+
.stringConf
371+
.checkValues(Set("hive", "native"))
372+
.createWithDefault("native")
373373

374374
val ORC_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.orc.filterPushdown")
375375
.doc("When true, enable filter pushdown for ORC files.")

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ object DataSource extends Logging {
540540
val csv = classOf[CSVFileFormat].getCanonicalName
541541
val libsvm = "org.apache.spark.ml.source.libsvm.LibSVMFileFormat"
542542
val orc = "org.apache.spark.sql.hive.orc.OrcFileFormat"
543-
val newOrc = classOf[OrcFileFormat].getCanonicalName
543+
val nativeOrc = classOf[OrcFileFormat].getCanonicalName
544544

545545
Map(
546546
"org.apache.spark.sql.jdbc" -> jdbc,
@@ -557,8 +557,8 @@ object DataSource extends Logging {
557557
"org.apache.spark.sql.execution.datasources.parquet.DefaultSource" -> parquet,
558558
"org.apache.spark.sql.hive.orc.DefaultSource" -> orc,
559559
"org.apache.spark.sql.hive.orc" -> orc,
560-
"org.apache.spark.sql.execution.datasources.orc.DefaultSource" -> newOrc,
561-
"org.apache.spark.sql.execution.datasources.orc" -> newOrc,
560+
"org.apache.spark.sql.execution.datasources.orc.DefaultSource" -> nativeOrc,
561+
"org.apache.spark.sql.execution.datasources.orc" -> nativeOrc,
562562
"org.apache.spark.ml.source.libsvm.DefaultSource" -> libsvm,
563563
"org.apache.spark.ml.source.libsvm" -> libsvm,
564564
"com.databricks.spark.csv" -> csv
@@ -576,7 +576,8 @@ object DataSource extends Logging {
576576
/** Given a provider name, look up the data source class definition. */
577577
def lookupDataSource(provider: String, conf: SQLConf): Class[_] = {
578578
val provider1 = backwardCompatibilityMap.getOrElse(provider, provider) match {
579-
case name if name.equalsIgnoreCase("orc") && conf.getConf(SQLConf.ORC_USE_NEW_VERSION) =>
579+
case name if name.equalsIgnoreCase("orc") &&
580+
conf.getConf(SQLConf.ORC_IMPLEMENTATION) == "native" =>
580581
classOf[OrcFileFormat].getCanonicalName
581582
case name => name
582583
}

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2786,14 +2786,14 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
27862786
}
27872787

27882788
test("SPARK-20728 Make ORCFileFormat configurable between sql/hive and sql/core") {
2789-
withSQLConf(SQLConf.ORC_USE_NEW_VERSION.key -> "false") {
2789+
withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "hive") {
27902790
val e = intercept[AnalysisException] {
27912791
sql("CREATE TABLE spark_20728(a INT) USING ORC")
27922792
}
27932793
assert(e.message.contains("The ORC data source must be used with Hive support enabled"))
27942794
}
27952795

2796-
withSQLConf(SQLConf.ORC_USE_NEW_VERSION.key -> "true") {
2796+
withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "native") {
27972797
withTable("spark_20728") {
27982798
sql("CREATE TABLE spark_20728(a INT) USING ORC")
27992799
val fileFormat = sql("SELECT * FROM spark_20728").queryExecution.analyzed.collectFirst {

sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
478478
}
479479

480480
test("orc - API and behavior regarding schema") {
481-
withSQLConf(SQLConf.ORC_USE_NEW_VERSION.key -> "true") {
481+
withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "native") {
482482
// Writer
483483
spark.createDataset(data).toDF("str").write.mode(SaveMode.Overwrite).orc(dir)
484484
val df = spark.read.orc(dir)
@@ -507,7 +507,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
507507
}
508508

509509
test("column nullability and comment - write and then read") {
510-
withSQLConf(SQLConf.ORC_USE_NEW_VERSION.key -> "true") {
510+
withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "native") {
511511
Seq("json", "orc", "parquet", "csv").foreach { format =>
512512
val schema = StructType(
513513
StructField("cl1", IntegerType, nullable = false).withComment("test") ::

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ case class RelationConversions(
194194
.convertToLogicalRelation(relation, options, classOf[ParquetFileFormat], "parquet")
195195
} else {
196196
val options = relation.tableMeta.storage.properties
197-
if (conf.getConf(SQLConf.ORC_USE_NEW_VERSION)) {
197+
if (conf.getConf(SQLConf.ORC_IMPLEMENTATION) == "native") {
198198
sessionCatalog.metastoreCatalog.convertToLogicalRelation(
199199
relation,
200200
options,

sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -624,10 +624,10 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
624624

625625
test("SPARK-20728 Make ORCFileFormat configurable between sql/hive and sql/core") {
626626
Seq(
627-
(true, classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat]),
628-
(false, classOf[org.apache.spark.sql.hive.orc.OrcFileFormat])).foreach { case (v, format) =>
627+
("native", classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat]),
628+
("hive", classOf[org.apache.spark.sql.hive.orc.OrcFileFormat])).foreach { case (i, format) =>
629629

630-
withSQLConf(SQLConf.ORC_USE_NEW_VERSION.key -> s"$v") {
630+
withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> i) {
631631
withTable("spark_20728") {
632632
sql("CREATE TABLE spark_20728(a INT) USING ORC")
633633
val fileFormat = sql("SELECT * FROM spark_20728").queryExecution.analyzed.collectFirst {

0 commit comments

Comments
 (0)