You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/12/26 07:31:25 UTC

[spark] branch master updated: [SPARK-33897][SQL] Can't set option 'cross' in join method

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 2553d53  [SPARK-33897][SQL] Can't set option 'cross' in join method
2553d53 is described below

commit 2553d53dc85fdf1127446941e2bc749e721c1b57
Author: kozakana <go...@gmail.com>
AuthorDate: Sat Dec 26 16:30:50 2020 +0900

    [SPARK-33897][SQL] Can't set option 'cross' in join method
    
    ### What changes were proposed in this pull request?
    
    [The PySpark documentation](https://spark.apache.org/docs/3.0.1/api/python/pyspark.sql.html#pyspark.sql.DataFrame.join) says "Must be one of: inner, cross, outer, full, fullouter, full_outer, left, leftouter, left_outer, right, rightouter, right_outer, semi, leftsemi, left_semi, anti, leftanti and left_anti."
    However, I get the following error when I set the cross option.
    
    ```
    scala> val df1 = spark.createDataFrame(Seq((1,"a"),(2,"b")))
    df1: org.apache.spark.sql.DataFrame = [_1: int, _2: string]
    
    scala> val df2 = spark.createDataFrame(Seq((1,"A"),(2,"B"), (3, "C")))
    df2: org.apache.spark.sql.DataFrame = [_1: int, _2: string]
    
    scala> df1.join(right = df2, usingColumns = Seq("_1"), joinType = "cross").show()
    java.lang.IllegalArgumentException: requirement failed: Unsupported using join type Cross
      at scala.Predef$.require(Predef.scala:281)
      at org.apache.spark.sql.catalyst.plans.UsingJoin.<init>(joinTypes.scala:106)
      at org.apache.spark.sql.Dataset.join(Dataset.scala:1025)
      ... 53 elided
    ```
    
    ### Why are the changes needed?
    
    The documentation says cross option can be set, but when I try to set it, I get an java.lang.IllegalArgumentException.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Accepting this PR fix will behave the same as the documentation.
    
    ### How was this patch tested?
    
    There is already a test for [JoinTypes](https://github.com/apache/spark/blob/1b9fd67904671ea08526bfb7a97d694815d47665/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala), but I can't find a test for the join option itself.
    
    Closes #30803 from kozakana/allow_cross_option.
    
    Authored-by: kozakana <go...@gmail.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala  |  2 +-
 .../test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala   | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index feea1d2..da3cfb4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -102,7 +102,7 @@ case class NaturalJoin(tpe: JoinType) extends JoinType {
 }
 
 case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType {
-  require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe),
+  require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti, Cross).contains(tpe),
     "Unsupported using join type " + tpe)
   override def sql: String = "USING " + tpe.sql
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index c317f56..1513c2e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -122,6 +122,16 @@ class DataFrameJoinSuite extends QueryTest
       df2.crossJoin(df1),
       Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") ::
         Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil)
+
+    checkAnswer(
+      df1.join(df2, Nil, "cross"),
+      Row(1, "1", 2, "2") :: Row(1, "1", 4, "4") ::
+        Row(3, "3", 2, "2") :: Row(3, "3", 4, "4") :: Nil)
+
+    checkAnswer(
+      df2.join(df1, Nil, "cross"),
+      Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") ::
+        Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil)
   }
 
   test("broadcast join hint using broadcast function") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org