You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2022/03/12 05:39:24 UTC

[spark] branch master updated: [SPARK-38526][SQL] Fix misleading function alias name for RuntimeReplaceable

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c91c2e9  [SPARK-38526][SQL] Fix misleading function alias name for RuntimeReplaceable
c91c2e9 is described below

commit c91c2e9afec0d5d5bbbd2e155057fe409c5bb928
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Fri Mar 11 21:38:24 2022 -0800

    [SPARK-38526][SQL] Fix misleading function alias name for RuntimeReplaceable
    
    ### What changes were proposed in this pull request?
    
    This PR uses a manual recursion to replace `RuntimeReplaceable` expressions instead of `transformAllExpressionsWithPruning`. The problem of `transformAllExpressionsWithPruning` is it will automatically make the replacement expression inherit  the function alias name from the parent node, which is quite misleading. For example, `select date_part('month', c) from t`, the optimized plan in EXPLAIN before this PR is
    ```
    Project [date_part(cast(c#18 as date)) AS date_part(month, c)#19]
    +- Relation default.t[c#18] parquet
    ```
    Now it's
    ```
    Project [month(cast(c#9 as date)) AS date_part(month, c)#10]
    +- Relation default.t[c#9] parquet
    ```
    
    ### Why are the changes needed?
    
    fix misleading EXPLAIN result
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    new test
    
    Closes #35821 from cloud-fan/follow2.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala | 9 +++++++--
 sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala  | 9 ++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 7b896e2..ef9c4b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -40,9 +40,14 @@ import org.apache.spark.util.Utils
  *   we use this to replace Every and Any with Min and Max respectively.
  */
 object ReplaceExpressions extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
+  def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsAnyPattern(RUNTIME_REPLACEABLE)) {
-    case e: RuntimeReplaceable => e.replacement
+    case p => p.mapExpressions(replace)
+  }
+
+  private def replace(e: Expression): Expression = e match {
+    case r: RuntimeReplaceable => replace(r.replacement)
+    case _ => e.mapChildren(replace)
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 3659f20..073b67e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -106,7 +106,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
       keywords = "InMemoryRelation", "StorageLevel(disk, memory, deserialized, 1 replicas)")
   }
 
-  test("optimized plan should show the rewritten aggregate expression") {
+  test("optimized plan should show the rewritten expression") {
     withTempView("test_agg") {
       sql(
         """
@@ -125,6 +125,13 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
         "Aggregate [k#x], [k#x, every(v#x) AS every(v)#x, some(v#x) AS some(v)#x, " +
           "any(v#x) AS any(v)#x]")
     }
+
+    withTable("t") {
+      sql("CREATE TABLE t(col TIMESTAMP) USING parquet")
+      val df = sql("SELECT date_part('month', col) FROM t")
+      checkKeywordsExistsInExplain(df,
+        "Project [month(cast(col#x as date)) AS date_part(month, col)#x]")
+    }
   }
 
   test("explain inline tables cross-joins") {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org