You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/11/04 16:42:49 UTC
[spark] branch branch-3.0 updated: [SPARK-33338][SQL] GROUP BY
using literal map should not fail
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new e7a6211 [SPARK-33338][SQL] GROUP BY using literal map should not fail
e7a6211 is described below
commit e7a62116029149e9c601c6345934ad8bd7ff8e82
Author: Dongjoon Hyun <dh...@apple.com>
AuthorDate: Wed Nov 4 08:35:10 2020 -0800
[SPARK-33338][SQL] GROUP BY using literal map should not fail
### What changes were proposed in this pull request?
This PR aims to fix `semanticEquals` works correctly on `GetMapValue` expressions having literal maps with `ArrayBasedMapData` and `GenericArrayData`.
### Why are the changes needed?
This is a regression from Apache Spark 1.6.x.
```scala
scala> sc.version
res1: String = 1.6.3
scala> sqlContext.sql("SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]").show
+---+
|_c0|
+---+
| v1|
+---+
```
Apache Spark 2.x ~ 3.0.1 raise`RuntimeException` for the following queries.
```sql
CREATE TABLE t USING ORC AS SELECT map('k1', 'v1') m, 'k1' k
SELECT map('k1', 'v1')[k] FROM t GROUP BY 1
SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]
SELECT map('k1', 'v1')[k] a FROM t GROUP BY a
```
**BEFORE**
```scala
Caused by: java.lang.RuntimeException: Couldn't find k#3 in [keys: [k1], values: [v1][k#3]#6]
at scala.sys.package$.error(package.scala:27)
at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:85)
at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:79)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
```
**AFTER**
```sql
spark-sql> SELECT map('k1', 'v1')[k] FROM t GROUP BY 1;
v1
Time taken: 1.278 seconds, Fetched 1 row(s)
spark-sql> SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k];
v1
Time taken: 0.313 seconds, Fetched 1 row(s)
spark-sql> SELECT map('k1', 'v1')[k] a FROM t GROUP BY a;
v1
Time taken: 0.265 seconds, Fetched 1 row(s)
```
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Pass the CIs with the newly added test case.
Closes #30246 from dongjoon-hyun/SPARK-33338.
Authored-by: Dongjoon Hyun <dh...@apple.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
(cherry picked from commit 42c0b175ce6ee4bf1104b6a8cef6bb6477693781)
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../apache/spark/sql/catalyst/expressions/literals.scala | 2 ++
.../spark/sql/catalyst/expressions/ComplexTypeSuite.scala | 15 +++++++++++++++
.../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 12 ++++++++++++
3 files changed, 29 insertions(+)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 9e96ab8..413d0af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -316,6 +316,8 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
(value, o.value) match {
case (null, null) => true
case (a: Array[Byte], b: Array[Byte]) => util.Arrays.equals(a, b)
+ case (a: ArrayBasedMapData, b: ArrayBasedMapData) =>
+ a.keyArray == b.keyArray && a.valueArray == b.valueArray
case (a, b) => a != null && a.equals(b)
}
case _ => false
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index dbe4370..54eb07b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedExtractValue}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@@ -466,4 +467,18 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
CreateNamedStruct(Seq("a", "x", "b", 2.0)).genCode(ctx)
assert(ctx.inlinedMutableStates.isEmpty)
}
+
+ test("SPARK-33338: semanticEquals should handle static GetMapValue correctly") {
+ val keys = new Array[UTF8String](1)
+ val values = new Array[UTF8String](1)
+ keys(0) = UTF8String.fromString("key")
+ values(0) = UTF8String.fromString("value")
+
+ val d1 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values))
+ val d2 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values))
+ val m1 = GetMapValue(Literal.create(d1, MapType(StringType, StringType)), Literal("a"))
+ val m2 = GetMapValue(Literal.create(d2, MapType(StringType, StringType)), Literal("a"))
+
+ assert(m1.semanticEquals(m2))
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 8f278f9..bcd3e79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3523,6 +3523,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
}
}
}
+
+ test("SPARK-33338: GROUP BY using literal map should not fail") {
+ withTempDir { dir =>
+ sql(s"CREATE TABLE t USING ORC LOCATION '${dir.toURI}' AS SELECT map('k1', 'v1') m, 'k1' k")
+ Seq(
+ "SELECT map('k1', 'v1')[k] FROM t GROUP BY 1",
+ "SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]",
+ "SELECT map('k1', 'v1')[k] a FROM t GROUP BY a").foreach { statement =>
+ checkAnswer(sql(statement), Row("v1"))
+ }
+ }
+ }
}
case class Foo(bar: Option[String])
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org