You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/02/02 17:18:28 UTC
[spark] branch branch-2.4 updated: [SPARK-26677][BUILD] Update
Parquet to 1.10.1 with notEq pushdown fix.
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new 190e48c [SPARK-26677][BUILD] Update Parquet to 1.10.1 with notEq pushdown fix.
190e48c is described below
commit 190e48c1aa68158f3477f00dfd11c541539b3174
Author: Ryan Blue <bl...@apache.org>
AuthorDate: Sat Feb 2 09:17:52 2019 -0800
[SPARK-26677][BUILD] Update Parquet to 1.10.1 with notEq pushdown fix.
## What changes were proposed in this pull request?
Update to Parquet Java 1.10.1.
## How was this patch tested?
Added a test from HyukjinKwon that validates the notEq case from SPARK-26677.
Closes #23704 from rdblue/SPARK-26677-fix-noteq-parquet-bug.
Lead-authored-by: Ryan Blue <bl...@apache.org>
Co-authored-by: Hyukjin Kwon <gu...@apache.org>
Co-authored-by: Ryan Blue <rd...@users.noreply.github.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
(cherry picked from commit f72d2177882dc47b043fdc7dec9a46fe65df4ee9)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
dev/deps/spark-deps-hadoop-2.7 | 10 +++++-----
dev/deps/spark-deps-hadoop-3.1 | 10 +++++-----
pom.xml | 2 +-
.../execution/datasources/parquet/ParquetQuerySuite.scala | 15 +++++++++++++++
4 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 365a59b..4a6ad3f 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -160,13 +160,13 @@ orc-shims-1.5.4.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
-parquet-column-1.10.0.jar
-parquet-common-1.10.0.jar
-parquet-encoding-1.10.0.jar
+parquet-column-1.10.1.jar
+parquet-common-1.10.1.jar
+parquet-encoding-1.10.1.jar
parquet-format-2.4.0.jar
-parquet-hadoop-1.10.0.jar
+parquet-hadoop-1.10.1.jar
parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.10.0.jar
+parquet-jackson-1.10.1.jar
protobuf-java-2.5.0.jar
py4j-0.10.7.jar
pyrolite-4.13.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 524e370..83e243b 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -178,13 +178,13 @@ orc-shims-1.5.4.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
-parquet-column-1.10.0.jar
-parquet-common-1.10.0.jar
-parquet-encoding-1.10.0.jar
+parquet-column-1.10.1.jar
+parquet-common-1.10.1.jar
+parquet-encoding-1.10.1.jar
parquet-format-2.4.0.jar
-parquet-hadoop-1.10.0.jar
+parquet-hadoop-1.10.1.jar
parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.10.0.jar
+parquet-jackson-1.10.1.jar
protobuf-java-2.5.0.jar
py4j-0.10.7.jar
pyrolite-4.13.jar
diff --git a/pom.xml b/pom.xml
index 91be9d9..a5a52cd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,7 +130,7 @@
<!-- Version used for internal directory structure -->
<hive.version.short>1.2.1</hive.version.short>
<derby.version>10.12.1.1</derby.version>
- <parquet.version>1.10.0</parquet.version>
+ <parquet.version>1.10.1</parquet.version>
<orc.version>1.5.4</orc.version>
<orc.classifier>nohive</orc.classifier>
<hive.parquet.version>1.6.0</hive.parquet.version>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 54c77dd..7f8357c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -891,6 +891,21 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
}
}
}
+
+ test("SPARK-26677: negated null-safe equality comparison should not filter matched row groups") {
+ (true :: false :: Nil).foreach { vectorized =>
+ withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
+ withTempPath { path =>
+ // Repeated values for dictionary encoding.
+ Seq(Some("A"), Some("A"), None).toDF.repartition(1)
+ .write.parquet(path.getAbsolutePath)
+ val df = spark.read.parquet(path.getAbsolutePath)
+ checkAnswer(stripSparkFilter(df.where("NOT (value <=> 'A')")), df)
+ }
+ }
+ }
+ }
+
}
object TestingUDT {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org