You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/03/13 22:01:23 UTC
[spark] branch master updated: [SPARK-27123][SQL][FOLLOWUP] Use
isRenaming check for limit too.
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 250946f [SPARK-27123][SQL][FOLLOWUP] Use isRenaming check for limit too.
250946f is described below
commit 250946ff93c31936b9aa5f3a47f81faac2835f4f
Author: Dongjoon Hyun <dh...@apple.com>
AuthorDate: Wed Mar 13 15:01:01 2019 -0700
[SPARK-27123][SQL][FOLLOWUP] Use isRenaming check for limit too.
## What changes were proposed in this pull request?
This is a followup for https://github.com/apache/spark/pull/24049 to reduce the scope of pattern based on the review comments.
## How was this patch tested?
Pass the existing test.
Closes #24082 from dongjoon-hyun/SPARK-27123-2.
Authored-by: Dongjoon Hyun <dh...@apple.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../spark/sql/catalyst/optimizer/Optimizer.scala | 29 ++++++++++------------
1 file changed, 13 insertions(+), 16 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 1b7ff02..4babd40 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -680,8 +680,12 @@ object ColumnPruning extends Rule[LogicalPlan] {
}
/**
- * Combines two adjacent [[Project]] operators into one and perform alias substitution,
- * merging the expressions into one single expression.
+ * Combines two [[Project]] operators into one and perform alias substitution,
+ * merging the expressions into one single expression for the following cases.
+ * 1. When two [[Project]] operators are adjacent.
+ * 2. When two [[Project]] operators have LocalLimit/Sample/Repartition operator between them
+ * and the upper project consists of the same number of columns which is equal or aliasing.
+ * `GlobalLimit(LocalLimit)` pattern is also considered.
*/
object CollapseProject extends Rule[LogicalPlan] {
@@ -699,20 +703,13 @@ object CollapseProject extends Rule[LogicalPlan] {
agg.copy(aggregateExpressions = buildCleanedProjectList(
p.projectList, agg.aggregateExpressions))
}
- case p1 @ Project(_, g @ GlobalLimit(_, l @ LocalLimit(_, p2: Project))) =>
- if (haveCommonNonDeterministicOutput(p1.projectList, p2.projectList)) {
- p1
- } else {
- val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList)
- g.copy(child = l.copy(child = p2.copy(projectList = newProjectList)))
- }
- case p1 @ Project(_, l @ LocalLimit(_, p2: Project)) =>
- if (haveCommonNonDeterministicOutput(p1.projectList, p2.projectList)) {
- p1
- } else {
- val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList)
- l.copy(child = p2.copy(projectList = newProjectList))
- }
+ case Project(l1, g @ GlobalLimit(_, limit @ LocalLimit(_, p2 @ Project(l2, _))))
+ if isRenaming(l1, l2) =>
+ val newProjectList = buildCleanedProjectList(l1, l2)
+ g.copy(child = limit.copy(child = p2.copy(projectList = newProjectList)))
+ case Project(l1, limit @ LocalLimit(_, p2 @ Project(l2, _))) if isRenaming(l1, l2) =>
+ val newProjectList = buildCleanedProjectList(l1, l2)
+ limit.copy(child = p2.copy(projectList = newProjectList))
case Project(l1, r @ Repartition(_, _, p @ Project(l2, _))) if isRenaming(l1, l2) =>
r.copy(child = p.copy(projectList = buildCleanedProjectList(l1, p.projectList)))
case Project(l1, s @ Sample(_, _, _, _, p2 @ Project(l2, _))) if isRenaming(l1, l2) =>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org