You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/11/12 22:17:10 UTC

spark git commit: [SPARK-22464][BACKPORT-2.2][SQL] No pushdown for Hive metastore partition predicates containing null-safe equality

Repository: spark
Updated Branches:
  refs/heads/branch-2.2 00cb9d0b6 -> 95981faaa


[SPARK-22464][BACKPORT-2.2][SQL] No pushdown for Hive metastore partition predicates containing null-safe equality

## What changes were proposed in this pull request?
`<=>` is not supported by Hive metastore partition predicate pushdown. We should not push down it to Hive metastore when they are be using in partition predicates.

## How was this patch tested?
Added a test case

Author: gatorsmile <ga...@gmail.com>

Closes #19724 from gatorsmile/backportSPARK-22464.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/95981faa
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/95981faa
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/95981faa

Branch: refs/heads/branch-2.2
Commit: 95981faaa67e4627443b862eecd391f2686cf7c5
Parents: 00cb9d0
Author: gatorsmile <ga...@gmail.com>
Authored: Sun Nov 12 23:17:06 2017 +0100
Committer: Wenchen Fan <we...@databricks.com>
Committed: Sun Nov 12 23:17:06 2017 +0100

----------------------------------------------------------------------
 .../apache/spark/sql/hive/client/HiveShim.scala | 20 ++++++++++++++++----
 .../spark/sql/hive/client/FiltersSuite.scala    |  4 ++++
 2 files changed, 20 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/95981faa/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 7abb9f0..056ffc6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -577,6 +577,18 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
   }
 
   /**
+   * An extractor that matches all binary comparison operators except null-safe equality.
+   *
+   * Null-safe equality is not supported by Hive metastore partition predicate pushdown
+   */
+  object SpecialBinaryComparison {
+    def unapply(e: BinaryComparison): Option[(Expression, Expression)] = e match {
+      case _: EqualNullSafe => None
+      case _ => Some((e.left, e.right))
+    }
+  }
+
+  /**
    * Converts catalyst expression to the format that Hive's getPartitionsByFilter() expects, i.e.
    * a string that represents partition predicates like "str_key=\"value\" and int_key=1 ...".
    *
@@ -590,14 +602,14 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       .map(col => col.getName).toSet
 
     filters.collect {
-      case op @ BinaryComparison(a: Attribute, Literal(v, _: IntegralType)) =>
+      case op @ SpecialBinaryComparison(a: Attribute, Literal(v, _: IntegralType)) =>
         s"${a.name} ${op.symbol} $v"
-      case op @ BinaryComparison(Literal(v, _: IntegralType), a: Attribute) =>
+      case op @ SpecialBinaryComparison(Literal(v, _: IntegralType), a: Attribute) =>
         s"$v ${op.symbol} ${a.name}"
-      case op @ BinaryComparison(a: Attribute, Literal(v, _: StringType))
+      case op @ SpecialBinaryComparison(a: Attribute, Literal(v, _: StringType))
           if !varcharKeys.contains(a.name) =>
         s"""${a.name} ${op.symbol} ${quoteStringLiteral(v.toString)}"""
-      case op @ BinaryComparison(Literal(v, _: StringType), a: Attribute)
+      case op @ SpecialBinaryComparison(Literal(v, _: StringType), a: Attribute)
           if !varcharKeys.contains(a.name) =>
         s"""${quoteStringLiteral(v.toString)} ${op.symbol} ${a.name}"""
     }.mkString(" and ")

http://git-wip-us.apache.org/repos/asf/spark/blob/95981faa/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index 031c1a5..9bc832a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -65,6 +65,10 @@ class FiltersSuite extends SparkFunSuite with Logging {
     (Literal("") === a("varchar", StringType)) :: Nil,
     "")
 
+  filterTest("null-safe equals",
+    (Literal("test") <=> a("stringcol", StringType)) :: Nil,
+    "")
+
   filterTest("SPARK-19912 String literals should be escaped for Hive metastore partition pruning",
     (a("stringcol", StringType) === Literal("p1\" and q=\"q1")) ::
       (Literal("p2\" and q=\"q2") === a("stringcol", StringType)) :: Nil,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org