You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2022/03/12 05:39:24 UTC
[spark] branch master updated: [SPARK-38526][SQL] Fix misleading function alias name for RuntimeReplaceable
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c91c2e9 [SPARK-38526][SQL] Fix misleading function alias name for RuntimeReplaceable
c91c2e9 is described below
commit c91c2e9afec0d5d5bbbd2e155057fe409c5bb928
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Fri Mar 11 21:38:24 2022 -0800
[SPARK-38526][SQL] Fix misleading function alias name for RuntimeReplaceable
### What changes were proposed in this pull request?
This PR uses a manual recursion to replace `RuntimeReplaceable` expressions instead of `transformAllExpressionsWithPruning`. The problem of `transformAllExpressionsWithPruning` is it will automatically make the replacement expression inherit the function alias name from the parent node, which is quite misleading. For example, `select date_part('month', c) from t`, the optimized plan in EXPLAIN before this PR is
```
Project [date_part(cast(c#18 as date)) AS date_part(month, c)#19]
+- Relation default.t[c#18] parquet
```
Now it's
```
Project [month(cast(c#9 as date)) AS date_part(month, c)#10]
+- Relation default.t[c#9] parquet
```
### Why are the changes needed?
fix misleading EXPLAIN result
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
new test
Closes #35821 from cloud-fan/follow2.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala | 9 +++++++--
sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala | 9 ++++++++-
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 7b896e2..ef9c4b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -40,9 +40,14 @@ import org.apache.spark.util.Utils
* we use this to replace Every and Any with Min and Max respectively.
*/
object ReplaceExpressions extends Rule[LogicalPlan] {
- def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
+ def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
_.containsAnyPattern(RUNTIME_REPLACEABLE)) {
- case e: RuntimeReplaceable => e.replacement
+ case p => p.mapExpressions(replace)
+ }
+
+ private def replace(e: Expression): Expression = e match {
+ case r: RuntimeReplaceable => replace(r.replacement)
+ case _ => e.mapChildren(replace)
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 3659f20..073b67e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -106,7 +106,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
keywords = "InMemoryRelation", "StorageLevel(disk, memory, deserialized, 1 replicas)")
}
- test("optimized plan should show the rewritten aggregate expression") {
+ test("optimized plan should show the rewritten expression") {
withTempView("test_agg") {
sql(
"""
@@ -125,6 +125,13 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
"Aggregate [k#x], [k#x, every(v#x) AS every(v)#x, some(v#x) AS some(v)#x, " +
"any(v#x) AS any(v)#x]")
}
+
+ withTable("t") {
+ sql("CREATE TABLE t(col TIMESTAMP) USING parquet")
+ val df = sql("SELECT date_part('month', col) FROM t")
+ checkKeywordsExistsInExplain(df,
+ "Project [month(cast(col#x as date)) AS date_part(month, col)#x]")
+ }
}
test("explain inline tables cross-joins") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org