Skip to content

Commit 3ee3b2a

Browse files
dongjoon-hyungatorsmile
authored andcommitted
[SPARK-23340][SQL] Upgrade Apache ORC to 1.4.3
## What changes were proposed in this pull request? This PR updates Apache ORC dependencies to 1.4.3 released on February 9th. Apache ORC 1.4.2 release removes unnecessary dependencies and 1.4.3 has 5 more patches (https://s.apache.org/Fll8). Especially, the following ORC-285 is fixed at 1.4.3. ```scala scala> val df = Seq(Array.empty[Float]).toDF() scala> df.write.format("orc").save("/tmp/floatarray") scala> spark.read.orc("/tmp/floatarray") res1: org.apache.spark.sql.DataFrame = [value: array<float>] scala> spark.read.orc("/tmp/floatarray").show() 18/02/12 22:09:10 ERROR Executor: Exception in task 0.0 in stage 1.0 (TID 1) java.io.IOException: Error reading file: file:/tmp/floatarray/part-00000-9c0b461b-4df1-4c23-aac1-3e4f349ac7d6-c000.snappy.orc at org.apache.orc.impl.RecordReaderImpl.nextBatch(RecordReaderImpl.java:1191) at org.apache.orc.mapreduce.OrcMapreduceRecordReader.ensureBatch(OrcMapreduceRecordReader.java:78) ... Caused by: java.io.EOFException: Read past EOF for compressed stream Stream for column 2 kind DATA position: 0 length: 0 range: 0 offset: 0 limit: 0 ``` ## How was this patch tested? Pass the Jenkins test. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #20511 from dongjoon-hyun/SPARK-23340.
1 parent 15ad4a7 commit 3ee3b2a

File tree

5 files changed

+24
-9
lines changed

5 files changed

+24
-9
lines changed

dev/deps/spark-deps-hadoop-2.6

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,8 @@ objenesis-2.1.jar
157157
okhttp-3.8.1.jar
158158
okio-1.13.0.jar
159159
opencsv-2.3.jar
160-
orc-core-1.4.1-nohive.jar
161-
orc-mapreduce-1.4.1-nohive.jar
160+
orc-core-1.4.3-nohive.jar
161+
orc-mapreduce-1.4.3-nohive.jar
162162
oro-2.0.8.jar
163163
osgi-resource-locator-1.0.1.jar
164164
paranamer-2.8.jar

dev/deps/spark-deps-hadoop-2.7

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@ objenesis-2.1.jar
158158
okhttp-3.8.1.jar
159159
okio-1.13.0.jar
160160
opencsv-2.3.jar
161-
orc-core-1.4.1-nohive.jar
162-
orc-mapreduce-1.4.1-nohive.jar
161+
orc-core-1.4.3-nohive.jar
162+
orc-mapreduce-1.4.3-nohive.jar
163163
oro-2.0.8.jar
164164
osgi-resource-locator-1.0.1.jar
165165
paranamer-2.8.jar

pom.xml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@
130130
<hive.version.short>1.2.1</hive.version.short>
131131
<derby.version>10.12.1.1</derby.version>
132132
<parquet.version>1.8.2</parquet.version>
133-
<orc.version>1.4.1</orc.version>
133+
<orc.version>1.4.3</orc.version>
134134
<orc.classifier>nohive</orc.classifier>
135135
<hive.parquet.version>1.6.0</hive.parquet.version>
136136
<jetty.version>9.3.20.v20170531</jetty.version>
@@ -1740,10 +1740,6 @@
17401740
<groupId>org.apache.hive</groupId>
17411741
<artifactId>hive-storage-api</artifactId>
17421742
</exclusion>
1743-
<exclusion>
1744-
<groupId>io.airlift</groupId>
1745-
<artifactId>slice</artifactId>
1746-
</exclusion>
17471743
</exclusions>
17481744
</dependency>
17491745
<dependency>

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,15 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
160160
}
161161
}
162162
}
163+
164+
test("SPARK-23340 Empty float/double array columns raise EOFException") {
165+
Seq(Seq(Array.empty[Float]).toDF(), Seq(Array.empty[Double]).toDF()).foreach { df =>
166+
withTempPath { path =>
167+
df.write.format("orc").save(path.getCanonicalPath)
168+
checkAnswer(spark.read.orc(path.getCanonicalPath), df)
169+
}
170+
}
171+
}
163172
}
164173

165174
class OrcSourceSuite extends OrcSuite with SharedSQLContext {

sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,14 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
208208
}
209209
}
210210
}
211+
212+
test("SPARK-23340 Empty float/double array columns raise EOFException") {
213+
withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
214+
withTable("spark_23340") {
215+
sql("CREATE TABLE spark_23340(a array<float>, b array<double>) STORED AS ORC")
216+
sql("INSERT INTO spark_23340 VALUES (array(), array())")
217+
checkAnswer(spark.table("spark_23340"), Seq(Row(Array.empty[Float], Array.empty[Double])))
218+
}
219+
}
220+
}
211221
}

0 commit comments

Comments
 (0)