You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/10/24 16:11:56 UTC
spark git commit: [SPARK-22301][SQL] Add rule to Optimizer for In
with not-nullable value and empty list
Repository: spark
Updated Branches:
refs/heads/master 8beeaed66 -> 3f5ba968c
[SPARK-22301][SQL] Add rule to Optimizer for In with not-nullable value and empty list
## What changes were proposed in this pull request?
For performance reason, we should resolve in operation on an empty list as false in the optimizations phase, ad discussed in #19522.
## How was this patch tested?
Added UT
cc gatorsmile
Author: Marco Gaido <ma...@gmail.com>
Author: Marco Gaido <mg...@hortonworks.com>
Closes #19523 from mgaido91/SPARK-22301.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3f5ba968
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3f5ba968
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3f5ba968
Branch: refs/heads/master
Commit: 3f5ba968c5af7911a2f6c452500b6a629a3de8db
Parents: 8beeaed
Author: Marco Gaido <ma...@gmail.com>
Authored: Tue Oct 24 09:11:52 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Tue Oct 24 09:11:52 2017 -0700
----------------------------------------------------------------------
.../spark/sql/catalyst/optimizer/expressions.scala | 7 +++++--
.../sql/catalyst/optimizer/OptimizeInSuite.scala | 16 ++++++++++++++++
2 files changed, 21 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/3f5ba968/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 273bc6c..523b53b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -169,13 +169,16 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] {
/**
* Optimize IN predicates:
- * 1. Removes literal repetitions.
- * 2. Replaces [[In (value, seq[Literal])]] with optimized version
+ * 1. Converts the predicate to false when the list is empty and
+ * the value is not nullable.
+ * 2. Removes literal repetitions.
+ * 3. Replaces [[In (value, seq[Literal])]] with optimized version
* [[InSet (value, HashSet[Literal])]] which is much faster.
*/
object OptimizeIn extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case q: LogicalPlan => q transformExpressionsDown {
+ case In(v, list) if list.isEmpty && !v.nullable => FalseLiteral
case expr @ In(v, list) if expr.inSetConvertible =>
val newList = ExpressionSet(list).toSeq
if (newList.size > SQLConf.get.optimizerInSetConversionThreshold) {
http://git-wip-us.apache.org/repos/asf/spark/blob/3f5ba968/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index eaad1e3..d7acd13 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -175,4 +175,20 @@ class OptimizeInSuite extends PlanTest {
}
}
}
+
+ test("OptimizedIn test: In empty list gets transformed to FalseLiteral " +
+ "when value is not nullable") {
+ val originalQuery =
+ testRelation
+ .where(In(Literal("a"), Nil))
+ .analyze
+
+ val optimized = Optimize.execute(originalQuery)
+ val correctAnswer =
+ testRelation
+ .where(Literal(false))
+ .analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org