You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2015/10/31 20:55:39 UTC
spark git commit: [SPARK-11024][SQL] Optimize NULL in
by folding it to Literal(null)
Repository: spark
Updated Branches:
refs/heads/master ac4118db2 -> fc27dfbf0
[SPARK-11024][SQL] Optimize NULL in <inlist-expressions> by folding it to Literal(null)
Add a rule in optimizer to convert NULL [NOT] IN (expr1,...,expr2) to
Literal(null).
This is a follow up defect to SPARK-8654
cloud-fan Can you please take a look ?
Author: Dilip Biswal <db...@us.ibm.com>
Closes #9348 from dilipbiswal/spark_11024.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc27dfbf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc27dfbf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc27dfbf
Branch: refs/heads/master
Commit: fc27dfbf0f8d3f96c70e27d88f7d0316c97ddb1e
Parents: ac4118d
Author: Dilip Biswal <db...@us.ibm.com>
Authored: Sat Oct 31 12:55:33 2015 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Sat Oct 31 12:55:33 2015 -0700
----------------------------------------------------------------------
.../sql/catalyst/optimizer/Optimizer.scala | 5 ++
.../catalyst/optimizer/OptimizeInSuite.scala | 51 +++++++++++++++++++-
2 files changed, 55 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/fc27dfbf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d37f438..338c519 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -417,6 +417,11 @@ object NullPropagation extends Rule[LogicalPlan] {
case left :: Literal(null, _) :: Nil => Literal.create(null, e.dataType)
case _ => e
}
+
+ // If the value expression is NULL then transform the In expression to
+ // Literal(null)
+ case In(Literal(null, _), list) => Literal.create(null, BooleanType)
+
}
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/fc27dfbf/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index 6f7b5b9..48cab01 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -35,7 +35,8 @@ class OptimizeInSuite extends PlanTest {
val batches =
Batch("AnalysisNodes", Once,
EliminateSubQueries) ::
- Batch("ConstantFolding", Once,
+ Batch("ConstantFolding", FixedPoint(10),
+ NullPropagation,
ConstantFolding,
BooleanSimplification,
OptimizeIn) :: Nil
@@ -82,4 +83,52 @@ class OptimizeInSuite extends PlanTest {
comparePlans(optimized, correctAnswer)
}
+
+ test("OptimizedIn test: NULL IN (expr1, ..., exprN) gets transformed to Filter(null)") {
+ val originalQuery =
+ testRelation
+ .where(In(Literal.create(null, NullType), Seq(Literal(1), Literal(2))))
+ .analyze
+
+ val optimized = Optimize.execute(originalQuery.analyze)
+ val correctAnswer =
+ testRelation
+ .where(Literal.create(null, BooleanType))
+ .analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
+ test("OptimizedIn test: Inset optimization disabled as " +
+ "list expression contains attribute)") {
+ val originalQuery =
+ testRelation
+ .where(In(Literal.create(null, StringType), Seq(Literal(1), UnresolvedAttribute("b"))))
+ .analyze
+
+ val optimized = Optimize.execute(originalQuery.analyze)
+ val correctAnswer =
+ testRelation
+ .where(Literal.create(null, BooleanType))
+ .analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
+ test("OptimizedIn test: Inset optimization disabled as " +
+ "list expression contains attribute - select)") {
+ val originalQuery =
+ testRelation
+ .select(In(Literal.create(null, StringType),
+ Seq(Literal(1), UnresolvedAttribute("b"))).as("a")).analyze
+
+ val optimized = Optimize.execute(originalQuery.analyze)
+ val correctAnswer =
+ testRelation
+ .select(Literal.create(null, BooleanType).as("a"))
+ .analyze
+
+ comparePlans(optimized, correctAnswer)
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org