You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/02/02 17:18:28 UTC

[spark] branch branch-2.4 updated: [SPARK-26677][BUILD] Update Parquet to 1.10.1 with notEq pushdown fix.

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 190e48c  [SPARK-26677][BUILD] Update Parquet to 1.10.1 with notEq pushdown fix.
190e48c is described below

commit 190e48c1aa68158f3477f00dfd11c541539b3174
Author: Ryan Blue <bl...@apache.org>
AuthorDate: Sat Feb 2 09:17:52 2019 -0800

    [SPARK-26677][BUILD] Update Parquet to 1.10.1 with notEq pushdown fix.
    
    ## What changes were proposed in this pull request?
    
    Update to Parquet Java 1.10.1.
    
    ## How was this patch tested?
    
    Added a test from HyukjinKwon that validates the notEq case from SPARK-26677.
    
    Closes #23704 from rdblue/SPARK-26677-fix-noteq-parquet-bug.
    
    Lead-authored-by: Ryan Blue <bl...@apache.org>
    Co-authored-by: Hyukjin Kwon <gu...@apache.org>
    Co-authored-by: Ryan Blue <rd...@users.noreply.github.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit f72d2177882dc47b043fdc7dec9a46fe65df4ee9)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7                            | 10 +++++-----
 dev/deps/spark-deps-hadoop-3.1                            | 10 +++++-----
 pom.xml                                                   |  2 +-
 .../execution/datasources/parquet/ParquetQuerySuite.scala | 15 +++++++++++++++
 4 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 365a59b..4a6ad3f 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -160,13 +160,13 @@ orc-shims-1.5.4.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.1.jar
 paranamer-2.8.jar
-parquet-column-1.10.0.jar
-parquet-common-1.10.0.jar
-parquet-encoding-1.10.0.jar
+parquet-column-1.10.1.jar
+parquet-common-1.10.1.jar
+parquet-encoding-1.10.1.jar
 parquet-format-2.4.0.jar
-parquet-hadoop-1.10.0.jar
+parquet-hadoop-1.10.1.jar
 parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.10.0.jar
+parquet-jackson-1.10.1.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.7.jar
 pyrolite-4.13.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 524e370..83e243b 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -178,13 +178,13 @@ orc-shims-1.5.4.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.1.jar
 paranamer-2.8.jar
-parquet-column-1.10.0.jar
-parquet-common-1.10.0.jar
-parquet-encoding-1.10.0.jar
+parquet-column-1.10.1.jar
+parquet-common-1.10.1.jar
+parquet-encoding-1.10.1.jar
 parquet-format-2.4.0.jar
-parquet-hadoop-1.10.0.jar
+parquet-hadoop-1.10.1.jar
 parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.10.0.jar
+parquet-jackson-1.10.1.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.7.jar
 pyrolite-4.13.jar
diff --git a/pom.xml b/pom.xml
index 91be9d9..a5a52cd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,7 +130,7 @@
     <!-- Version used for internal directory structure -->
     <hive.version.short>1.2.1</hive.version.short>
     <derby.version>10.12.1.1</derby.version>
-    <parquet.version>1.10.0</parquet.version>
+    <parquet.version>1.10.1</parquet.version>
     <orc.version>1.5.4</orc.version>
     <orc.classifier>nohive</orc.classifier>
     <hive.parquet.version>1.6.0</hive.parquet.version>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 54c77dd..7f8357c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -891,6 +891,21 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
       }
     }
   }
+
+  test("SPARK-26677: negated null-safe equality comparison should not filter matched row groups") {
+    (true :: false :: Nil).foreach { vectorized =>
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
+        withTempPath { path =>
+          // Repeated values for dictionary encoding.
+          Seq(Some("A"), Some("A"), None).toDF.repartition(1)
+            .write.parquet(path.getAbsolutePath)
+          val df = spark.read.parquet(path.getAbsolutePath)
+          checkAnswer(stripSparkFilter(df.where("NOT (value <=> 'A')")), df)
+        }
+      }
+    }
+  }
+
 }
 
 object TestingUDT {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org