You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yu...@apache.org on 2023/03/29 06:01:42 UTC

[spark] branch master updated: Revert "[SPARK-42832][SQL] Remove repartition if it is the child of LocalLimit"

This is an automated email from the ASF dual-hosted git repository.

yumwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 58facc16536 Revert "[SPARK-42832][SQL] Remove repartition if it is the child of LocalLimit"
58facc16536 is described below

commit 58facc16536cb2784aace38d6cca77bd055a3053
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Wed Mar 29 13:51:51 2023 +0800

    Revert "[SPARK-42832][SQL] Remove repartition if it is the child of LocalLimit"
    
    This reverts commit f9c105e1b693760d8c904066fdb65630aa4aeb91.
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala       |  9 ---------
 .../catalyst/optimizer/CollapseRepartitionSuite.scala  | 18 ------------------
 2 files changed, 27 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index ffc7284e40a..7d1ef6c7fb3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1213,15 +1213,6 @@ object CollapseRepartition extends Rule[LogicalPlan] {
     // child.
     case r @ RebalancePartitions(_, child: RebalancePartitions, _, _) =>
       r.withNewChildren(child.children)
-    // Case 5: When a LocalLimit has a child of Repartition we can remove the Repartition.
-    // Because its output is determined by the number of partitions and the expressions of the
-    // Repartition. Therefore, it is feasible to remove Repartition except for repartition by
-    // nondeterministic expressions, because users may expect to randomly take data.
-    case l @ LocalLimit(_, r: RepartitionByExpression)
-        if r.partitionExpressions.nonEmpty && r.partitionExpressions.forall(_.deterministic) =>
-      l.copy(child = r.child)
-    case l @ LocalLimit(_, r: RebalancePartitions) =>
-      l.copy(child = r.child)
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
index 7c8a90fe23e..f9eb6d2e760 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Uuid
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -223,21 +222,4 @@ class CollapseRepartitionSuite extends PlanTest {
       comparePlans(optimized, expected)
     }
   }
-
-  test("SPARK-42832: Remove repartition if it is the child of LocalLimit") {
-    Seq(testRelation.distribute($"a")(2),
-      testRelation.rebalance(),
-      testRelation.rebalance($"a")).foreach { repartition =>
-      comparePlans(
-        Optimize.execute(repartition.limit(3).analyze),
-        testRelation.limit(3).analyze)
-    }
-
-    // In this case, do not remove repartition, the user may want to randomly take data.
-    Seq(testRelation.distribute()(2),
-      testRelation.distribute(Uuid())(2)).foreach { repartition =>
-      val plan = repartition.limit(3).analyze
-      comparePlans( Optimize.execute(plan), plan)
-    }
-  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org