You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/07/03 04:20:19 UTC
spark git commit: [SPARK-24385][SQL] Resolve self-join condition
ambiguity for EqualNullSafe
Repository: spark
Updated Branches:
refs/heads/master 85fe1297e -> a7c8f0c8c
[SPARK-24385][SQL] Resolve self-join condition ambiguity for EqualNullSafe
## What changes were proposed in this pull request?
In Dataset.join we have a small hack for resolving ambiguity in the column name for self-joins. The current code supports only `EqualTo`.
The PR extends the fix to `EqualNullSafe`.
Credit for this PR should be given to daniel-shields.
## How was this patch tested?
added UT
Author: Marco Gaido <ma...@gmail.com>
Closes #21605 from mgaido91/SPARK-24385_2.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7c8f0c8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7c8f0c8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7c8f0c8
Branch: refs/heads/master
Commit: a7c8f0c8cb144a026ea21e8780107e363ceacb8d
Parents: 85fe129
Author: Marco Gaido <ma...@gmail.com>
Authored: Tue Jul 3 12:20:03 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Tue Jul 3 12:20:03 2018 +0800
----------------------------------------------------------------------
sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 5 +++++
.../test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala | 8 ++++++++
2 files changed, 13 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/a7c8f0c8/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 2ec236f..c97246f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1016,6 +1016,11 @@ class Dataset[T] private[sql](
catalyst.expressions.EqualTo(
withPlan(plan.left).resolve(a.name),
withPlan(plan.right).resolve(b.name))
+ case catalyst.expressions.EqualNullSafe(a: AttributeReference, b: AttributeReference)
+ if a.sameRef(b) =>
+ catalyst.expressions.EqualNullSafe(
+ withPlan(plan.left).resolve(a.name),
+ withPlan(plan.right).resolve(b.name))
}}
withPlan {
http://git-wip-us.apache.org/repos/asf/spark/blob/a7c8f0c8/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 0d9eeab..10d9a11 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -287,4 +287,12 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
dfOne.join(dfTwo, $"a" === $"b", "left").queryExecution.optimizedPlan
}
}
+
+ test("SPARK-24385: Resolve ambiguity in self-joins with EqualNullSafe") {
+ withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
+ val df = spark.range(2)
+ // this throws an exception before the fix
+ df.join(df, df("id") <=> df("id")).queryExecution.optimizedPlan
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org