You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/11/04 16:42:49 UTC

[spark] branch branch-3.0 updated: [SPARK-33338][SQL] GROUP BY using literal map should not fail

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new e7a6211  [SPARK-33338][SQL] GROUP BY using literal map should not fail
e7a6211 is described below

commit e7a62116029149e9c601c6345934ad8bd7ff8e82
Author: Dongjoon Hyun <dh...@apple.com>
AuthorDate: Wed Nov 4 08:35:10 2020 -0800

    [SPARK-33338][SQL] GROUP BY using literal map should not fail
    
    ### What changes were proposed in this pull request?
    
    This PR aims to fix `semanticEquals` works correctly on `GetMapValue` expressions having literal maps with `ArrayBasedMapData` and `GenericArrayData`.
    
    ### Why are the changes needed?
    
    This is a regression from Apache Spark 1.6.x.
    ```scala
    scala> sc.version
    res1: String = 1.6.3
    
    scala> sqlContext.sql("SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]").show
    +---+
    |_c0|
    +---+
    | v1|
    +---+
    ```
    
    Apache Spark 2.x ~ 3.0.1 raise`RuntimeException` for the following queries.
    ```sql
    CREATE TABLE t USING ORC AS SELECT map('k1', 'v1') m, 'k1' k
    SELECT map('k1', 'v1')[k] FROM t GROUP BY 1
    SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]
    SELECT map('k1', 'v1')[k] a FROM t GROUP BY a
    ```
    
    **BEFORE**
    ```scala
    Caused by: java.lang.RuntimeException: Couldn't find k#3 in [keys: [k1], values: [v1][k#3]#6]
    	at scala.sys.package$.error(package.scala:27)
    	at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:85)
    	at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:79)
    	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
    ```
    
    **AFTER**
    ```sql
    spark-sql> SELECT map('k1', 'v1')[k] FROM t GROUP BY 1;
    v1
    Time taken: 1.278 seconds, Fetched 1 row(s)
    spark-sql> SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k];
    v1
    Time taken: 0.313 seconds, Fetched 1 row(s)
    spark-sql> SELECT map('k1', 'v1')[k] a FROM t GROUP BY a;
    v1
    Time taken: 0.265 seconds, Fetched 1 row(s)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Pass the CIs with the newly added test case.
    
    Closes #30246 from dongjoon-hyun/SPARK-33338.
    
    Authored-by: Dongjoon Hyun <dh...@apple.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit 42c0b175ce6ee4bf1104b6a8cef6bb6477693781)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../apache/spark/sql/catalyst/expressions/literals.scala  |  2 ++
 .../spark/sql/catalyst/expressions/ComplexTypeSuite.scala | 15 +++++++++++++++
 .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala   | 12 ++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 9e96ab8..413d0af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -316,6 +316,8 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
       (value, o.value) match {
         case (null, null) => true
         case (a: Array[Byte], b: Array[Byte]) => util.Arrays.equals(a, b)
+        case (a: ArrayBasedMapData, b: ArrayBasedMapData) =>
+          a.keyArray == b.keyArray && a.valueArray == b.valueArray
         case (a, b) => a != null && a.equals(b)
       }
     case _ => false
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index dbe4370..54eb07b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedExtractValue}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -466,4 +467,18 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     CreateNamedStruct(Seq("a", "x", "b", 2.0)).genCode(ctx)
     assert(ctx.inlinedMutableStates.isEmpty)
   }
+
+  test("SPARK-33338: semanticEquals should handle static GetMapValue correctly") {
+    val keys = new Array[UTF8String](1)
+    val values = new Array[UTF8String](1)
+    keys(0) = UTF8String.fromString("key")
+    values(0) = UTF8String.fromString("value")
+
+    val d1 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values))
+    val d2 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values))
+    val m1 = GetMapValue(Literal.create(d1, MapType(StringType, StringType)), Literal("a"))
+    val m2 = GetMapValue(Literal.create(d2, MapType(StringType, StringType)), Literal("a"))
+
+    assert(m1.semanticEquals(m2))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 8f278f9..bcd3e79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3523,6 +3523,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       }
     }
   }
+
+  test("SPARK-33338: GROUP BY using literal map should not fail") {
+    withTempDir { dir =>
+      sql(s"CREATE TABLE t USING ORC LOCATION '${dir.toURI}' AS SELECT map('k1', 'v1') m, 'k1' k")
+      Seq(
+        "SELECT map('k1', 'v1')[k] FROM t GROUP BY 1",
+        "SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]",
+        "SELECT map('k1', 'v1')[k] a FROM t GROUP BY a").foreach { statement =>
+        checkAnswer(sql(statement), Row("v1"))
+      }
+    }
+  }
 }
 
 case class Foo(bar: Option[String])


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org