You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2016/03/21 03:35:31 UTC

spark git commit: [SPARK-14019][SQL] Remove noop SortOrder in Sort

Repository: spark
Updated Branches:
  refs/heads/master 454a00df2 -> f58319a24


[SPARK-14019][SQL] Remove noop SortOrder in Sort

#### What changes were proposed in this pull request?

This PR is to add a new Optimizer rule for pruning Sort if its SortOrder is no-op. In the phase of **Optimizer**, if a specific `SortOrder` does not have any reference, it has no effect on the sorting results. If `Sort` is empty, remove the whole `Sort`.

For example, in the following SQL query
```SQL
SELECT * FROM t ORDER BY NULL + 5
```

Before the fix, the plan is like
```
== Analyzed Logical Plan ==
a: int, b: int
Sort [(cast(null as int) + 5) ASC], true
+- Project [a#92,b#93]
   +- SubqueryAlias t
      +- Project [_1#89 AS a#92,_2#90 AS b#93]
         +- LocalRelation [_1#89,_2#90], [[1,2],[1,2]]

== Optimized Logical Plan ==
Sort [null ASC], true
+- LocalRelation [a#92,b#93], [[1,2],[1,2]]

== Physical Plan ==
WholeStageCodegen
:  +- Sort [null ASC], true, 0
:     +- INPUT
+- Exchange rangepartitioning(null ASC, 5), None
   +- LocalTableScan [a#92,b#93], [[1,2],[1,2]]
```

After the fix, the plan is like
```
== Analyzed Logical Plan ==
a: int, b: int
Sort [(cast(null as int) + 5) ASC], true
+- Project [a#92,b#93]
   +- SubqueryAlias t
      +- Project [_1#89 AS a#92,_2#90 AS b#93]
         +- LocalRelation [_1#89,_2#90], [[1,2],[1,2]]

== Optimized Logical Plan ==
LocalRelation [a#92,b#93], [[1,2],[1,2]]

== Physical Plan ==
LocalTableScan [a#92,b#93], [[1,2],[1,2]]
```

cc rxin cloud-fan marmbrus Thanks!

#### How was this patch tested?
Added a test suite for covering this rule

Author: gatorsmile <ga...@gmail.com>

Closes #11840 from gatorsmile/sortElimination.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f58319a2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f58319a2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f58319a2

Branch: refs/heads/master
Commit: f58319a24fd5e026411538b1fb7336d9d894277b
Parents: 454a00d
Author: gatorsmile <ga...@gmail.com>
Authored: Mon Mar 21 10:34:54 2016 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Mon Mar 21 10:34:54 2016 +0800

----------------------------------------------------------------------
 .../sql/catalyst/optimizer/Optimizer.scala      | 12 ++++
 .../optimizer/EliminateSortsSuite.scala         | 66 ++++++++++++++++++++
 2 files changed, 78 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f58319a2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index c419b5f..41e8dc0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -87,6 +87,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] {
       SimplifyConditionals,
       RemoveDispensableExpressions,
       PruneFilters,
+      EliminateSorts,
       SimplifyCasts,
       SimplifyCaseConversionExpressions,
       EliminateSerialization) ::
@@ -826,6 +827,17 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
 }
 
 /**
+ * Removes no-op SortOrder from Sort
+ */
+object EliminateSorts  extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case s @ Sort(orders, _, child) if orders.isEmpty || orders.exists(_.child.foldable) =>
+      val newOrders = orders.filterNot(_.child.foldable)
+      if (newOrders.isEmpty) child else s.copy(order = newOrders)
+  }
+}
+
+/**
  * Removes filters that can be evaluated trivially.  This can be done through the following ways:
  * 1) by eliding the filter for cases where it will always evaluate to `true`.
  * 2) by substituting a dummy empty relation when the filter will always evaluate to `false`.

http://git-wip-us.apache.org/repos/asf/spark/blob/f58319a2/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
new file mode 100644
index 0000000..27c15e8
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+
+class EliminateSortsSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Eliminate Sorts", Once,
+        EliminateSorts) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+
+  test("Empty order by clause") {
+    val x = testRelation
+
+    val query = x.orderBy()
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = x.analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("All the SortOrder are no-op") {
+    val x = testRelation
+
+    val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending))
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = x.analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Partial order-by clauses contain no-op SortOrder") {
+    val x = testRelation
+
+    val query = x.orderBy(SortOrder(3, Ascending), 'a.asc)
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = x.orderBy('a.asc).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org