You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/01/29 16:56:47 UTC

spark git commit: [SPARK-23199][SQL] improved Removes repetition from group expressions in Aggregate

Repository: spark
Updated Branches:
  refs/heads/master badf0d0e0 -> 54dd7cf4e


[SPARK-23199][SQL] improved Removes repetition from group expressions in Aggregate

## What changes were proposed in this pull request?

Currently, all Aggregate operations will go into RemoveRepetitionFromGroupExpressions, but there is no group expression or there is no duplicate group expression in group expression, we not need copy for logic plan.

## How was this patch tested?

the existed test case.

Author: caoxuewen <ca...@zte.com.cn>

Closes #20375 from heary-cao/RepetitionGroupExpressions.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54dd7cf4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54dd7cf4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54dd7cf4

Branch: refs/heads/master
Commit: 54dd7cf4ef921bc9dc12f99cfb90d1da57939901
Parents: badf0d0
Author: caoxuewen <ca...@zte.com.cn>
Authored: Mon Jan 29 08:56:42 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Mon Jan 29 08:56:42 2018 -0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala  | 8 ++++++--
 .../sql/catalyst/optimizer/AggregateOptimizeSuite.scala      | 5 ++---
 2 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/54dd7cf4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 8d20770..a28b6a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1302,8 +1302,12 @@ object RemoveLiteralFromGroupExpressions extends Rule[LogicalPlan] {
  */
 object RemoveRepetitionFromGroupExpressions extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case a @ Aggregate(grouping, _, _) =>
+    case a @ Aggregate(grouping, _, _) if grouping.size > 1 =>
       val newGrouping = ExpressionSet(grouping).toSeq
-      a.copy(groupingExpressions = newGrouping)
+      if (newGrouping.size == grouping.size) {
+        a
+      } else {
+        a.copy(groupingExpressions = newGrouping)
+      }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/54dd7cf4/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
index a3184a4..f8ddc93 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
@@ -67,10 +67,9 @@ class AggregateOptimizeSuite extends PlanTest {
   }
 
   test("remove repetition in grouping expression") {
-    val input = LocalRelation('a.int, 'b.int, 'c.int)
-    val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c))
+    val query = testRelation.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c))
     val optimized = Optimize.execute(analyzer.execute(query))
-    val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze
+    val correctAnswer = testRelation.groupBy('a + 1, 'b + 2)(sum('c)).analyze
 
     comparePlans(optimized, correctAnswer)
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org