You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/07/03 04:20:19 UTC
spark git commit: [SPARK-24385][SQL] Resolve self-join condition ambiguity for EqualNullSafe

Repository: spark
Updated Branches:
  refs/heads/master 85fe1297e -> a7c8f0c8c


[SPARK-24385][SQL] Resolve self-join condition ambiguity for EqualNullSafe

## What changes were proposed in this pull request?

In Dataset.join we have a small hack for resolving ambiguity in the column name for self-joins. The current code supports only `EqualTo`.

The PR extends the fix to `EqualNullSafe`.

Credit for this PR should be given to daniel-shields.

## How was this patch tested?

added UT

Author: Marco Gaido <ma...@gmail.com>

Closes #21605 from mgaido91/SPARK-24385_2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7c8f0c8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7c8f0c8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7c8f0c8

Branch: refs/heads/master
Commit: a7c8f0c8cb144a026ea21e8780107e363ceacb8d
Parents: 85fe129
Author: Marco Gaido <ma...@gmail.com>
Authored: Tue Jul 3 12:20:03 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Tue Jul 3 12:20:03 2018 +0800

----------------------------------------------------------------------
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala   | 5 +++++
 .../test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala | 8 ++++++++
 2 files changed, 13 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a7c8f0c8/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 2ec236f..c97246f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1016,6 +1016,11 @@ class Dataset[T] private[sql](
         catalyst.expressions.EqualTo(
           withPlan(plan.left).resolve(a.name),
           withPlan(plan.right).resolve(b.name))
+      case catalyst.expressions.EqualNullSafe(a: AttributeReference, b: AttributeReference)
+        if a.sameRef(b) =>
+        catalyst.expressions.EqualNullSafe(
+          withPlan(plan.left).resolve(a.name),
+          withPlan(plan.right).resolve(b.name))
     }}
 
     withPlan {

http://git-wip-us.apache.org/repos/asf/spark/blob/a7c8f0c8/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 0d9eeab..10d9a11 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -287,4 +287,12 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
       dfOne.join(dfTwo, $"a" === $"b", "left").queryExecution.optimizedPlan
     }
   }
+
+  test("SPARK-24385: Resolve ambiguity in self-joins with EqualNullSafe") {
+    withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
+      val df = spark.range(2)
+      // this throws an exception before the fix
+      df.join(df, df("id") <=> df("id")).queryExecution.optimizedPlan
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org