You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yu...@apache.org on 2023/03/29 06:01:42 UTC
[spark] branch master updated: Revert "[SPARK-42832][SQL] Remove repartition if it is the child of LocalLimit"
This is an automated email from the ASF dual-hosted git repository.
yumwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 58facc16536 Revert "[SPARK-42832][SQL] Remove repartition if it is the child of LocalLimit"
58facc16536 is described below
commit 58facc16536cb2784aace38d6cca77bd055a3053
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Wed Mar 29 13:51:51 2023 +0800
Revert "[SPARK-42832][SQL] Remove repartition if it is the child of LocalLimit"
This reverts commit f9c105e1b693760d8c904066fdb65630aa4aeb91.
---
.../spark/sql/catalyst/optimizer/Optimizer.scala | 9 ---------
.../catalyst/optimizer/CollapseRepartitionSuite.scala | 18 ------------------
2 files changed, 27 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index ffc7284e40a..7d1ef6c7fb3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1213,15 +1213,6 @@ object CollapseRepartition extends Rule[LogicalPlan] {
// child.
case r @ RebalancePartitions(_, child: RebalancePartitions, _, _) =>
r.withNewChildren(child.children)
- // Case 5: When a LocalLimit has a child of Repartition we can remove the Repartition.
- // Because its output is determined by the number of partitions and the expressions of the
- // Repartition. Therefore, it is feasible to remove Repartition except for repartition by
- // nondeterministic expressions, because users may expect to randomly take data.
- case l @ LocalLimit(_, r: RepartitionByExpression)
- if r.partitionExpressions.nonEmpty && r.partitionExpressions.forall(_.deterministic) =>
- l.copy(child = r.child)
- case l @ LocalLimit(_, r: RebalancePartitions) =>
- l.copy(child = r.child)
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
index 7c8a90fe23e..f9eb6d2e760 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.optimizer
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Uuid
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -223,21 +222,4 @@ class CollapseRepartitionSuite extends PlanTest {
comparePlans(optimized, expected)
}
}
-
- test("SPARK-42832: Remove repartition if it is the child of LocalLimit") {
- Seq(testRelation.distribute($"a")(2),
- testRelation.rebalance(),
- testRelation.rebalance($"a")).foreach { repartition =>
- comparePlans(
- Optimize.execute(repartition.limit(3).analyze),
- testRelation.limit(3).analyze)
- }
-
- // In this case, do not remove repartition, the user may want to randomly take data.
- Seq(testRelation.distribute()(2),
- testRelation.distribute(Uuid())(2)).foreach { repartition =>
- val plan = repartition.limit(3).analyze
- comparePlans( Optimize.execute(plan), plan)
- }
- }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org