You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/17 08:25:42 UTC
spark git commit: [SPARK-23340][SQL] Upgrade Apache ORC to 1.4.3
Repository: spark
Updated Branches:
refs/heads/master 15ad4a7f1 -> 3ee3b2ae1
[SPARK-23340][SQL] Upgrade Apache ORC to 1.4.3
## What changes were proposed in this pull request?
This PR updates Apache ORC dependencies to 1.4.3 released on February 9th. Apache ORC 1.4.2 release removes unnecessary dependencies and 1.4.3 has 5 more patches (https://s.apache.org/Fll8).
Especially, the following ORC-285 is fixed at 1.4.3.
```scala
scala> val df = Seq(Array.empty[Float]).toDF()
scala> df.write.format("orc").save("/tmp/floatarray")
scala> spark.read.orc("/tmp/floatarray")
res1: org.apache.spark.sql.DataFrame = [value: array<float>]
scala> spark.read.orc("/tmp/floatarray").show()
18/02/12 22:09:10 ERROR Executor: Exception in task 0.0 in stage 1.0 (TID 1)
java.io.IOException: Error reading file: file:/tmp/floatarray/part-00000-9c0b461b-4df1-4c23-aac1-3e4f349ac7d6-c000.snappy.orc
at org.apache.orc.impl.RecordReaderImpl.nextBatch(RecordReaderImpl.java:1191)
at org.apache.orc.mapreduce.OrcMapreduceRecordReader.ensureBatch(OrcMapreduceRecordReader.java:78)
...
Caused by: java.io.EOFException: Read past EOF for compressed stream Stream for column 2 kind DATA position: 0 length: 0 range: 0 offset: 0 limit: 0
```
## How was this patch tested?
Pass the Jenkins test.
Author: Dongjoon Hyun <do...@apache.org>
Closes #20511 from dongjoon-hyun/SPARK-23340.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ee3b2ae
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ee3b2ae
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ee3b2ae
Branch: refs/heads/master
Commit: 3ee3b2ae1ff8fbeb43a08becef43a9bd763b06bb
Parents: 15ad4a7
Author: Dongjoon Hyun <do...@apache.org>
Authored: Sat Feb 17 00:25:36 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Sat Feb 17 00:25:36 2018 -0800
----------------------------------------------------------------------
dev/deps/spark-deps-hadoop-2.6 | 4 ++--
dev/deps/spark-deps-hadoop-2.7 | 4 ++--
pom.xml | 6 +-----
.../sql/execution/datasources/orc/OrcSourceSuite.scala | 9 +++++++++
.../org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala | 10 ++++++++++
5 files changed, 24 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/3ee3b2ae/dev/deps/spark-deps-hadoop-2.6
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 9903138..ed31050 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -157,8 +157,8 @@ objenesis-2.1.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
-orc-core-1.4.1-nohive.jar
-orc-mapreduce-1.4.1-nohive.jar
+orc-core-1.4.3-nohive.jar
+orc-mapreduce-1.4.3-nohive.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
http://git-wip-us.apache.org/repos/asf/spark/blob/3ee3b2ae/dev/deps/spark-deps-hadoop-2.7
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index cf8d278..04dec04 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -158,8 +158,8 @@ objenesis-2.1.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
-orc-core-1.4.1-nohive.jar
-orc-mapreduce-1.4.1-nohive.jar
+orc-core-1.4.3-nohive.jar
+orc-mapreduce-1.4.3-nohive.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
http://git-wip-us.apache.org/repos/asf/spark/blob/3ee3b2ae/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index de949b9..ac30107 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,7 +130,7 @@
<hive.version.short>1.2.1</hive.version.short>
<derby.version>10.12.1.1</derby.version>
<parquet.version>1.8.2</parquet.version>
- <orc.version>1.4.1</orc.version>
+ <orc.version>1.4.3</orc.version>
<orc.classifier>nohive</orc.classifier>
<hive.parquet.version>1.6.0</hive.parquet.version>
<jetty.version>9.3.20.v20170531</jetty.version>
@@ -1740,10 +1740,6 @@
<groupId>org.apache.hive</groupId>
<artifactId>hive-storage-api</artifactId>
</exclusion>
- <exclusion>
- <groupId>io.airlift</groupId>
- <artifactId>slice</artifactId>
- </exclusion>
</exclusions>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/spark/blob/3ee3b2ae/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 6f5f2fd..523f7cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -160,6 +160,15 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
}
}
}
+
+ test("SPARK-23340 Empty float/double array columns raise EOFException") {
+ Seq(Seq(Array.empty[Float]).toDF(), Seq(Array.empty[Double]).toDF()).foreach { df =>
+ withTempPath { path =>
+ df.write.format("orc").save(path.getCanonicalPath)
+ checkAnswer(spark.read.orc(path.getCanonicalPath), df)
+ }
+ }
+ }
}
class OrcSourceSuite extends OrcSuite with SharedSQLContext {
http://git-wip-us.apache.org/repos/asf/spark/blob/3ee3b2ae/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index 92b2f06..597b0f5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -208,4 +208,14 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
}
}
}
+
+ test("SPARK-23340 Empty float/double array columns raise EOFException") {
+ withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
+ withTable("spark_23340") {
+ sql("CREATE TABLE spark_23340(a array<float>, b array<double>) STORED AS ORC")
+ sql("INSERT INTO spark_23340 VALUES (array(), array())")
+ checkAnswer(spark.table("spark_23340"), Seq(Row(Array.empty[Float], Array.empty[Double])))
+ }
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org