You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/09/15 22:36:55 UTC

spark git commit: [SPARK-10475] [SQL] improve column prunning for Project on Sort

Repository: spark
Updated Branches:
  refs/heads/master 841972e22 -> 31a229aa7


[SPARK-10475] [SQL] improve column prunning for Project on Sort

Sometimes we can't push down the whole `Project` though `Sort`, but we still have a chance to push down part of it.

Author: Wenchen Fan <cl...@outlook.com>

Closes #8644 from cloud-fan/column-prune.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/31a229aa
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/31a229aa
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/31a229aa

Branch: refs/heads/master
Commit: 31a229aa739b6d05ec6d91b820fcca79b6b7d6fe
Parents: 841972e
Author: Wenchen Fan <cl...@outlook.com>
Authored: Tue Sep 15 13:36:52 2015 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Tue Sep 15 13:36:52 2015 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/optimizer/Optimizer.scala | 19 +++++++++++++++----
 .../catalyst/optimizer/ColumnPruningSuite.scala  | 11 +++++++++++
 2 files changed, 26 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/31a229aa/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 0f4caec..648a65e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -228,10 +228,21 @@ object ColumnPruning extends Rule[LogicalPlan] {
     case Project(projectList, Limit(exp, child)) =>
       Limit(exp, Project(projectList, child))
 
-    // Push down project if possible when the child is sort
-    case p @ Project(projectList, s @ Sort(_, _, grandChild))
-      if s.references.subsetOf(p.outputSet) =>
-      s.copy(child = Project(projectList, grandChild))
+    // Push down project if possible when the child is sort.
+    case p @ Project(projectList, s @ Sort(_, _, grandChild)) =>
+      if (s.references.subsetOf(p.outputSet)) {
+        s.copy(child = Project(projectList, grandChild))
+      } else {
+        val neededReferences = s.references ++ p.references
+        if (neededReferences == grandChild.outputSet) {
+          // No column we can prune, return the original plan.
+          p
+        } else {
+          // Do not use neededReferences.toSeq directly, should respect grandChild's output order.
+          val newProjectList = grandChild.output.filter(neededReferences.contains)
+          p.copy(child = s.copy(child = Project(newProjectList, grandChild)))
+        }
+      }
 
     // Eliminate no-op Projects
     case Project(projectList, child) if child.output == projectList => child

http://git-wip-us.apache.org/repos/asf/spark/blob/31a229aa/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index dbebcb8..4a1e7ce 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -80,5 +80,16 @@ class ColumnPruningSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("Column pruning for Project on Sort") {
+    val input = LocalRelation('a.int, 'b.string, 'c.double)
+
+    val query = input.orderBy('b.asc).select('a).analyze
+    val optimized = Optimize.execute(query)
+
+    val correctAnswer = input.select('a, 'b).orderBy('b.asc).select('a).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
   // todo: add more tests for column pruning
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org