You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/12/26 07:31:25 UTC
[spark] branch master updated: [SPARK-33897][SQL] Can't set option
'cross' in join method
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2553d53 [SPARK-33897][SQL] Can't set option 'cross' in join method
2553d53 is described below
commit 2553d53dc85fdf1127446941e2bc749e721c1b57
Author: kozakana <go...@gmail.com>
AuthorDate: Sat Dec 26 16:30:50 2020 +0900
[SPARK-33897][SQL] Can't set option 'cross' in join method
### What changes were proposed in this pull request?
[The PySpark documentation](https://spark.apache.org/docs/3.0.1/api/python/pyspark.sql.html#pyspark.sql.DataFrame.join) says "Must be one of: inner, cross, outer, full, fullouter, full_outer, left, leftouter, left_outer, right, rightouter, right_outer, semi, leftsemi, left_semi, anti, leftanti and left_anti."
However, I get the following error when I set the cross option.
```
scala> val df1 = spark.createDataFrame(Seq((1,"a"),(2,"b")))
df1: org.apache.spark.sql.DataFrame = [_1: int, _2: string]
scala> val df2 = spark.createDataFrame(Seq((1,"A"),(2,"B"), (3, "C")))
df2: org.apache.spark.sql.DataFrame = [_1: int, _2: string]
scala> df1.join(right = df2, usingColumns = Seq("_1"), joinType = "cross").show()
java.lang.IllegalArgumentException: requirement failed: Unsupported using join type Cross
at scala.Predef$.require(Predef.scala:281)
at org.apache.spark.sql.catalyst.plans.UsingJoin.<init>(joinTypes.scala:106)
at org.apache.spark.sql.Dataset.join(Dataset.scala:1025)
... 53 elided
```
### Why are the changes needed?
The documentation says cross option can be set, but when I try to set it, I get an java.lang.IllegalArgumentException.
### Does this PR introduce _any_ user-facing change?
Accepting this PR fix will behave the same as the documentation.
### How was this patch tested?
There is already a test for [JoinTypes](https://github.com/apache/spark/blob/1b9fd67904671ea08526bfb7a97d694815d47665/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala), but I can't find a test for the join option itself.
Closes #30803 from kozakana/allow_cross_option.
Authored-by: kozakana <go...@gmail.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala | 2 +-
.../test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala | 10 ++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index feea1d2..da3cfb4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -102,7 +102,7 @@ case class NaturalJoin(tpe: JoinType) extends JoinType {
}
case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType {
- require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe),
+ require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti, Cross).contains(tpe),
"Unsupported using join type " + tpe)
override def sql: String = "USING " + tpe.sql
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index c317f56..1513c2e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -122,6 +122,16 @@ class DataFrameJoinSuite extends QueryTest
df2.crossJoin(df1),
Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") ::
Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil)
+
+ checkAnswer(
+ df1.join(df2, Nil, "cross"),
+ Row(1, "1", 2, "2") :: Row(1, "1", 4, "4") ::
+ Row(3, "3", 2, "2") :: Row(3, "3", 4, "4") :: Nil)
+
+ checkAnswer(
+ df2.join(df1, Nil, "cross"),
+ Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") ::
+ Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil)
}
test("broadcast join hint using broadcast function") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org