You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yu...@apache.org on 2022/06/24 03:16:48 UTC
[spark] branch master updated: [SPARK-39449][SQL] Propagate empty relation through Window
This is an automated email from the ASF dual-hosted git repository.
yumwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a077701d4cc [SPARK-39449][SQL] Propagate empty relation through Window
a077701d4cc is described below
commit a077701d4cc36a9a6ce898ddd3b4e5fd506f6162
Author: Yuming Wang <yu...@ebay.com>
AuthorDate: Fri Jun 24 11:16:35 2022 +0800
[SPARK-39449][SQL] Propagate empty relation through Window
### What changes were proposed in this pull request?
This PR adds support for propagating empty relation through `Window` if its child is empty. For example:
```sql
SELECT id,
Count(*)
OVER (
ORDER BY id)
FROM (SELECT /*+ REPARTITION(3) */ *
FROM Range(100)
WHERE id < 0) t
```
After this PR:
```
== Physical Plan ==
AdaptiveSparkPlan (10)
+- == Final Plan ==
LocalTableScan (1)
+- == Initial Plan ==
CollectLimit (9)
+- Project (8)
+- Window (7)
+- Sort (6)
+- Exchange (5)
+- Exchange (4)
+- Filter (3)
+- Range (2)
```
### Why are the changes needed?
Improve query performance.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Unit test.
Closes #36848 from wangyum/SPARK-39449.
Authored-by: Yuming Wang <yu...@ebay.com>
Signed-off-by: Yuming Wang <yu...@ebay.com>
---
.../sql/catalyst/optimizer/PropagateEmptyRelation.scala | 1 +
.../catalyst/optimizer/PropagateEmptyRelationSuite.scala | 14 +++++++++++++-
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index 894fd0d7042..18c344f10f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -158,6 +158,7 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
// Generators like Hive-style UDTF may return their records within `close`.
case Generate(_: Explode, _, _, _, _, _) => empty(p)
case Expand(_, _, _) => empty(p)
+ case _: Window => empty(p)
case _ => p
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
index 40b95f38a14..e39bf0fffb9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.expressions.{Literal, UnspecifiedFrame}
import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical.{Expand, LocalRelation, LogicalPlan, Project}
@@ -315,4 +315,16 @@ class PropagateEmptyRelationSuite extends PlanTest {
val optimized2 = Optimize.execute(plan2)
comparePlans(optimized2, expected)
}
+
+ test("SPARK-39449: Propagate empty relation through Window") {
+ val relation = LocalRelation.fromExternalRows(Seq($"a".int, $"b".int), Nil)
+
+ val originalQuery = relation.select($"a", $"b",
+ windowExpr(count($"b"), windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame))
+ .as("window"))
+
+ val expected = LocalRelation
+ .fromExternalRows(Seq($"a".int, $"b".int, $"window".long.withNullability(false)), Nil)
+ comparePlans(Optimize.execute(originalQuery.analyze), expected.analyze)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org