You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/06/10 20:16:17 UTC
[spark] branch branch-3.0 updated: [SPARK-31956][SQL] Do not fail if there is no ambiguous self join

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 62fbff8  [SPARK-31956][SQL] Do not fail if there is no ambiguous self join
62fbff8 is described below

commit 62fbff8ad127f3a6dd2360f3c02a20f4391cdad4
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Wed Jun 10 13:11:24 2020 -0700

    [SPARK-31956][SQL] Do not fail if there is no ambiguous self join
    
    ### What changes were proposed in this pull request?
    
    This is a followup of https://github.com/apache/spark/pull/28695 , to fix the problem completely.
    
    The root cause is that, `df("col").as("name")` is not a column reference anymore, and should not have the special column metadata. However, this was broken in https://github.com/apache/spark/commit/ba7adc494923de8104ab37d412edd78afe540f45#diff-ac415c903887e49486ba542a65eec980L1050-L1053
    
    This PR fixes the regression, by strip the special column metadata in `Column.name`, which is the behavior before https://github.com/apache/spark/pull/28326 .
    
    ### Why are the changes needed?
    
    Fix a regression. We shouldn't fail if there is no ambiguous self-join.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, the query in the test can run now.
    
    ### How was this patch tested?
    
    updated test
    
    Closes #28783 from cloud-fan/self-join.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit c40051932290db3a63f80324900a116019b1e589)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 sql/core/src/main/scala/org/apache/spark/sql/Column.scala          | 2 +-
 .../test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala   | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 2144472..e6f7b1d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -1042,7 +1042,7 @@ class Column(val expr: Expression) extends Logging {
    * @since 2.0.0
    */
   def name(alias: String): Column = withExpr {
-    Alias(expr, alias)()
+    Alias(normalizedExpr(), alias)()
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
index fb58c98..3b3b54f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
@@ -204,7 +204,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("SPARK-28344: don't fail as ambiguous self join when there is no join") {
+  test("SPARK-28344: don't fail if there is no ambiguous self join") {
     withSQLConf(
       SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true") {
       val df = Seq(1, 1, 2, 2).toDF("a")
@@ -212,6 +212,11 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
       checkAnswer(
         df.select(df("a").alias("x"), sum(df("a")).over(w)),
         Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple))
+
+      val joined = df.join(spark.range(1)).select($"a")
+      checkAnswer(
+        joined.select(joined("a").alias("x"), sum(joined("a")).over(w)),
+        Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple))
     }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org