You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/10/09 02:03:49 UTC

git commit: [SPARK-3831] [SQL] Filter rule Improvement and bool expression optimization.

Repository: spark
Updated Branches:
  refs/heads/master add174aa5 -> a85f24acc


[SPARK-3831] [SQL] Filter rule Improvement and bool expression optimization.

If we write the filter which is always FALSE like

    SELECT * from person WHERE FALSE;

200 tasks will run. I think, 1 task is enough.

And current optimizer cannot optimize the case NOT is duplicated like

    SELECT * from person WHERE NOT ( NOT (age > 30));

The filter rule above should be simplified

Author: Kousuke Saruta <sa...@oss.nttdata.co.jp>

Closes #2692 from sarutak/SPARK-3831 and squashes the following commits:

25f3e20 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3831
23c750c [Kousuke Saruta] Improved unsupported predicate test case
a11b9f3 [Kousuke Saruta] Modified NOT predicate test case in PartitionBatchPruningSuite
8ea872b [Kousuke Saruta] Fixed the number of tasks when the data of  LocalRelation is empty.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a85f24ac
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a85f24ac
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a85f24ac

Branch: refs/heads/master
Commit: a85f24accd3266e0f97ee04d03c22b593d99c062
Parents: add174a
Author: Kousuke Saruta <sa...@oss.nttdata.co.jp>
Authored: Wed Oct 8 17:03:47 2014 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Wed Oct 8 17:03:47 2014 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 12 ++++++++++++
 .../apache/spark/sql/execution/SparkStrategies.scala    |  3 ++-
 .../spark/sql/columnar/PartitionBatchPruningSuite.scala |  3 ++-
 3 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a85f24ac/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index a4133fe..636d0b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -299,6 +299,18 @@ object BooleanSimplification extends Rule[LogicalPlan] {
           case (_, _) => or
         }
 
+      case not @ Not(exp) =>
+        exp match {
+          case Literal(true, BooleanType) => Literal(false)
+          case Literal(false, BooleanType) => Literal(true)
+          case GreaterThan(l, r) => LessThanOrEqual(l, r)
+          case GreaterThanOrEqual(l, r) => LessThan(l, r)
+          case LessThan(l, r) => GreaterThanOrEqual(l, r)
+          case LessThanOrEqual(l, r) => GreaterThan(l, r)
+          case Not(e) => e
+          case _ => not
+        }
+
       // Turn "if (true) a else b" into "a", and if (false) a else b" into "b".
       case e @ If(Literal(v, _), trueValue, falseValue) => if (v == true) trueValue else falseValue
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/a85f24ac/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 5c16d0c..883f2ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -274,9 +274,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.Sample(fraction, withReplacement, seed, planLater(child)) :: Nil
       case SparkLogicalPlan(alreadyPlanned) => alreadyPlanned :: Nil
       case logical.LocalRelation(output, data) =>
+        val nPartitions = if (data.isEmpty) 1 else numPartitions
         PhysicalRDD(
           output,
-          RDDConversions.productToRowRdd(sparkContext.parallelize(data, numPartitions))) :: Nil
+          RDDConversions.productToRowRdd(sparkContext.parallelize(data, nPartitions))) :: Nil
       case logical.Limit(IntegerLiteral(limit), child) =>
         execution.Limit(limit, planLater(child)) :: Nil
       case Unions(unionChildren) =>

http://git-wip-us.apache.org/repos/asf/spark/blob/a85f24ac/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
index 69e0adb..f53acc8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala
@@ -67,10 +67,11 @@ class PartitionBatchPruningSuite extends FunSuite with BeforeAndAfterAll with Be
   checkBatchPruning("i > 8 AND i <= 21", 9 to 21, 2, 3)
   checkBatchPruning("i < 2 OR i > 99", Seq(1, 100), 2, 2)
   checkBatchPruning("i < 2 OR (i > 78 AND i < 92)", Seq(1) ++ (79 to 91), 3, 4)
+  checkBatchPruning("NOT (i < 88)", 88 to 100, 1, 2)
 
   // With unsupported predicate
   checkBatchPruning("i < 12 AND i IS NOT NULL", 1 to 11, 1, 2)
-  checkBatchPruning("NOT (i < 88)", 88 to 100, 5, 10)
+  checkBatchPruning(s"NOT (i in (${(1 to 30).mkString(",")}))", 31 to 100, 5, 10)
 
   def checkBatchPruning(
       filter: String,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org