You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/04/19 16:48:39 UTC
spark git commit: [SPARK-23340][SQL][BRANCH-2.3] Upgrade Apache ORC
to 1.4.3
Repository: spark
Updated Branches:
refs/heads/branch-2.3 fb968215c -> be184d16e
[SPARK-23340][SQL][BRANCH-2.3] Upgrade Apache ORC to 1.4.3
## What changes were proposed in this pull request?
This PR updates Apache ORC dependencies to 1.4.3 released on February 9th. Apache ORC 1.4.2 release removes unnecessary dependencies and 1.4.3 has 5 more patches (https://s.apache.org/Fll8).
Especially, the following ORC-285 is fixed at 1.4.3.
```scala
scala> val df = Seq(Array.empty[Float]).toDF()
scala> df.write.format("orc").save("/tmp/floatarray")
scala> spark.read.orc("/tmp/floatarray")
res1: org.apache.spark.sql.DataFrame = [value: array<float>]
scala> spark.read.orc("/tmp/floatarray").show()
18/02/12 22:09:10 ERROR Executor: Exception in task 0.0 in stage 1.0 (TID 1)
java.io.IOException: Error reading file: file:/tmp/floatarray/part-00000-9c0b461b-4df1-4c23-aac1-3e4f349ac7d6-c000.snappy.orc
at org.apache.orc.impl.RecordReaderImpl.nextBatch(RecordReaderImpl.java:1191)
at org.apache.orc.mapreduce.OrcMapreduceRecordReader.ensureBatch(OrcMapreduceRecordReader.java:78)
...
Caused by: java.io.EOFException: Read past EOF for compressed stream Stream for column 2 kind DATA position: 0 length: 0 range: 0 offset: 0 limit: 0
```
## How was this patch tested?
Pass the Jenkins test.
Author: Dongjoon Hyun <do...@apache.org>
Closes #21093 from dongjoon-hyun/SPARK-23340-2.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be184d16
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be184d16
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be184d16
Branch: refs/heads/branch-2.3
Commit: be184d16e86f96a748d6bf1642c1c319d2a09f5c
Parents: fb96821
Author: Dongjoon Hyun <do...@apache.org>
Authored: Thu Apr 19 09:48:34 2018 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Thu Apr 19 09:48:34 2018 -0700
----------------------------------------------------------------------
dev/deps/spark-deps-hadoop-2.6 | 4 ++--
dev/deps/spark-deps-hadoop-2.7 | 4 ++--
pom.xml | 6 +-----
.../sql/execution/datasources/orc/OrcSourceSuite.scala | 9 +++++++++
.../org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala | 10 ++++++++++
5 files changed, 24 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/be184d16/dev/deps/spark-deps-hadoop-2.6
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 3b2a885..3ded7ae 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -156,8 +156,8 @@ objenesis-2.1.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
-orc-core-1.4.1-nohive.jar
-orc-mapreduce-1.4.1-nohive.jar
+orc-core-1.4.3-nohive.jar
+orc-mapreduce-1.4.3-nohive.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
http://git-wip-us.apache.org/repos/asf/spark/blob/be184d16/dev/deps/spark-deps-hadoop-2.7
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 757da00..344f22d 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -157,8 +157,8 @@ objenesis-2.1.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
-orc-core-1.4.1-nohive.jar
-orc-mapreduce-1.4.1-nohive.jar
+orc-core-1.4.3-nohive.jar
+orc-mapreduce-1.4.3-nohive.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
http://git-wip-us.apache.org/repos/asf/spark/blob/be184d16/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 651ce71..9c2d931 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,7 +130,7 @@
<hive.version.short>1.2.1</hive.version.short>
<derby.version>10.12.1.1</derby.version>
<parquet.version>1.8.2</parquet.version>
- <orc.version>1.4.1</orc.version>
+ <orc.version>1.4.3</orc.version>
<orc.classifier>nohive</orc.classifier>
<hive.parquet.version>1.6.0</hive.parquet.version>
<jetty.version>9.3.20.v20170531</jetty.version>
@@ -1739,10 +1739,6 @@
<groupId>org.apache.hive</groupId>
<artifactId>hive-storage-api</artifactId>
</exclusion>
- <exclusion>
- <groupId>io.airlift</groupId>
- <artifactId>slice</artifactId>
- </exclusion>
</exclusions>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/spark/blob/be184d16/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 6f5f2fd..523f7cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -160,6 +160,15 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
}
}
}
+
+ test("SPARK-23340 Empty float/double array columns raise EOFException") {
+ Seq(Seq(Array.empty[Float]).toDF(), Seq(Array.empty[Double]).toDF()).foreach { df =>
+ withTempPath { path =>
+ df.write.format("orc").save(path.getCanonicalPath)
+ checkAnswer(spark.read.orc(path.getCanonicalPath), df)
+ }
+ }
+ }
}
class OrcSourceSuite extends OrcSuite with SharedSQLContext {
http://git-wip-us.apache.org/repos/asf/spark/blob/be184d16/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index 92b2f06..597b0f5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -208,4 +208,14 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
}
}
}
+
+ test("SPARK-23340 Empty float/double array columns raise EOFException") {
+ withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
+ withTable("spark_23340") {
+ sql("CREATE TABLE spark_23340(a array<float>, b array<double>) STORED AS ORC")
+ sql("INSERT INTO spark_23340 VALUES (array(), array())")
+ checkAnswer(spark.table("spark_23340"), Seq(Row(Array.empty[Float], Array.empty[Double])))
+ }
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org