You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/11/07 20:57:49 UTC
spark git commit: [SPARK-22464][SQL] No pushdown for Hive metastore
partition predicates containing null-safe equality
Repository: spark
Updated Branches:
refs/heads/master 1d341042d -> 0846a4473
[SPARK-22464][SQL] No pushdown for Hive metastore partition predicates containing null-safe equality
## What changes were proposed in this pull request?
`<=>` is not supported by Hive metastore partition predicate pushdown. We should not push down it to Hive metastore when they are be using in partition predicates.
## How was this patch tested?
Added a test case
Author: gatorsmile <ga...@gmail.com>
Closes #19682 from gatorsmile/fixLimitPushDown.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0846a447
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0846a447
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0846a447
Branch: refs/heads/master
Commit: 0846a44736d9a71ba3234ad5de4c8de9e7fe9f6c
Parents: 1d34104
Author: gatorsmile <ga...@gmail.com>
Authored: Tue Nov 7 21:57:43 2017 +0100
Committer: Wenchen Fan <we...@databricks.com>
Committed: Tue Nov 7 21:57:43 2017 +0100
----------------------------------------------------------------------
.../apache/spark/sql/hive/client/HiveShim.scala | 29 ++++++++++++++------
.../spark/sql/hive/client/HiveClientSuite.scala | 9 ++++++
2 files changed, 30 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/0846a447/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 5c1ff2b..bd1b300 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -592,6 +592,19 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
}
}
+
+ /**
+ * An extractor that matches all binary comparison operators except null-safe equality.
+ *
+ * Null-safe equality is not supported by Hive metastore partition predicate pushdown
+ */
+ object SpecialBinaryComparison {
+ def unapply(e: BinaryComparison): Option[(Expression, Expression)] = e match {
+ case _: EqualNullSafe => None
+ case _ => Some((e.left, e.right))
+ }
+ }
+
private def convertBasicFilters(table: Table, filters: Seq[Expression]): String = {
// hive varchar is treated as catalyst string, but hive varchar can't be pushed down.
lazy val varcharKeys = table.getPartitionKeys.asScala
@@ -600,14 +613,14 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
.map(col => col.getName).toSet
filters.collect {
- case op @ BinaryComparison(a: Attribute, Literal(v, _: IntegralType)) =>
+ case op @ SpecialBinaryComparison(a: Attribute, Literal(v, _: IntegralType)) =>
s"${a.name} ${op.symbol} $v"
- case op @ BinaryComparison(Literal(v, _: IntegralType), a: Attribute) =>
+ case op @ SpecialBinaryComparison(Literal(v, _: IntegralType), a: Attribute) =>
s"$v ${op.symbol} ${a.name}"
- case op @ BinaryComparison(a: Attribute, Literal(v, _: StringType))
+ case op @ SpecialBinaryComparison(a: Attribute, Literal(v, _: StringType))
if !varcharKeys.contains(a.name) =>
s"""${a.name} ${op.symbol} ${quoteStringLiteral(v.toString)}"""
- case op @ BinaryComparison(Literal(v, _: StringType), a: Attribute)
+ case op @ SpecialBinaryComparison(Literal(v, _: StringType), a: Attribute)
if !varcharKeys.contains(a.name) =>
s"""${quoteStringLiteral(v.toString)} ${op.symbol} ${a.name}"""
}.mkString(" and ")
@@ -666,16 +679,16 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
case InSet(a: Attribute, ExtractableValues(values))
if !varcharKeys.contains(a.name) && values.nonEmpty =>
convertInToOr(a, values)
- case op @ BinaryComparison(a: Attribute, ExtractableLiteral(value))
+ case op @ SpecialBinaryComparison(a: Attribute, ExtractableLiteral(value))
if !varcharKeys.contains(a.name) =>
s"${a.name} ${op.symbol} $value"
- case op @ BinaryComparison(ExtractableLiteral(value), a: Attribute)
+ case op @ SpecialBinaryComparison(ExtractableLiteral(value), a: Attribute)
if !varcharKeys.contains(a.name) =>
s"$value ${op.symbol} ${a.name}"
- case op @ And(expr1, expr2)
+ case And(expr1, expr2)
if convert.isDefinedAt(expr1) || convert.isDefinedAt(expr2) =>
(convert.lift(expr1) ++ convert.lift(expr2)).mkString("(", " and ", ")")
- case op @ Or(expr1, expr2)
+ case Or(expr1, expr2)
if convert.isDefinedAt(expr1) && convert.isDefinedAt(expr2) =>
s"(${convert(expr1)} or ${convert(expr2)})"
}
http://git-wip-us.apache.org/repos/asf/spark/blob/0846a447/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index 3eedcf7..ce53ace 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -78,6 +78,15 @@ class HiveClientSuite(version: String)
assert(filteredPartitions.size == testPartitionCount)
}
+ test("getPartitionsByFilter: ds<=>20170101") {
+ // Should return all partitions where <=> is not supported
+ testMetastorePartitionFiltering(
+ "ds<=>20170101",
+ 20170101 to 20170103,
+ 0 to 23,
+ "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+ }
+
test("getPartitionsByFilter: ds=20170101") {
testMetastorePartitionFiltering(
"ds=20170101",
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org