You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/03/03 14:49:03 UTC

[spark] branch branch-2.4 updated: [SPARK-30998][SQL][2.4] ClassCastException when a generator having nested inner generators

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new f4c8c48  [SPARK-30998][SQL][2.4] ClassCastException when a generator having nested inner generators
f4c8c48 is described below

commit f4c8c4892197b8c5425a8013a09e9b379444e6fc
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Tue Mar 3 23:47:40 2020 +0900

    [SPARK-30998][SQL][2.4] ClassCastException when a generator having nested inner generators
    
    ### What changes were proposed in this pull request?
    
    A query below failed in branch-2.4;
    
    ```
    scala> sql("select array(array(1, 2), array(3)) ar").select(explode(explode($"ar"))).show()
    20/03/01 13:51:56 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)/ 1]
    java.lang.ClassCastException: scala.collection.mutable.ArrayOps$ofRef cannot be cast to org.apache.spark.sql.catalyst.util.ArrayData
    	at org.apache.spark.sql.catalyst.expressions.ExplodeBase.eval(generators.scala:313)
    	at org.apache.spark.sql.execution.GenerateExec.$anonfun$doExecute$8(GenerateExec.scala:108)
    	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
    	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
    	at scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:222)
    	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
        ...
    ```
    
    This pr modified the `hasNestedGenerator` code in `ExtractGenerator` for correctly catching nested inner generators.
    
    This backport PR comes from https://github.com/apache/spark/pull/27750#
    ### Why are the changes needed?
    
    A bug fix.
    
    ### Does this PR introduce any user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added tests.
    
    Closes #27769 from maropu/SPARK-20998-BRANCH-2.4.
    
    Authored-by: Takeshi Yamamuro <ya...@apache.org>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 16 +++++++++++++---
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala    | 19 +++++++++++++++++++
 .../org/apache/spark/sql/GeneratorFunctionSuite.scala |  8 ++++++++
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0fedf7f..61f77be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1681,10 +1681,20 @@ class Analyzer(
     }
 
     private def hasNestedGenerator(expr: NamedExpression): Boolean = {
+      def hasInnerGenerator(g: Generator): Boolean = g match {
+        // Since `GeneratorOuter` is just a wrapper of generators, we skip it here
+        case go: GeneratorOuter =>
+          hasInnerGenerator(go.child)
+        case _ =>
+          g.children.exists { _.find {
+            case _: Generator => true
+            case _ => false
+          }.isDefined }
+      }
       CleanupAliases.trimNonTopLevelAliases(expr) match {
-        case UnresolvedAlias(_: Generator, _) => false
-        case Alias(_: Generator, _) => false
-        case MultiAlias(_: Generator, _) => false
+        case UnresolvedAlias(g: Generator, _) => hasInnerGenerator(g)
+        case Alias(g: Generator, _) => hasInnerGenerator(g)
+        case MultiAlias(g: Generator, _) => hasInnerGenerator(g)
         case other => hasGenerator(other)
       }
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 45319aa..337902f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -395,6 +395,25 @@ class AnalysisErrorSuite extends AnalysisTest {
   )
 
   errorTest(
+    "SPARK-30998: unsupported nested inner generators",
+    {
+      val nestedListRelation = LocalRelation(
+        AttributeReference("nestedList", ArrayType(ArrayType(IntegerType)))())
+      nestedListRelation.select(Explode(Explode($"nestedList")))
+    },
+    "Generators are not supported when it's nested in expressions, but got: " +
+      "explode(explode(nestedList))" :: Nil
+  )
+
+  errorTest(
+    "SPARK-30998: unsupported nested inner generators for aggregates",
+    testRelation.select(Explode(Explode(
+      CreateArray(CreateArray(min($"a") :: max($"a") :: Nil) :: Nil)))),
+    "Generators are not supported when it's nested in expressions, but got: " +
+      "explode(explode(array(array(min(a), max(a)))))" :: Nil
+  )
+
+  errorTest(
     "generator appears in operator which is not Project",
     listRelation.sortBy(Explode('list).asc),
     "Generators are not supported outside the SELECT clause, but got: Sort" :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 8280a3c..df66b49 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -308,6 +308,14 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
       sql("select * from values 1, 2 lateral view outer empty_gen() a as b"),
       Row(1, null) :: Row(2, null) :: Nil)
   }
+
+  test("SPARK-30998: Unsupported nested inner generators") {
+    val errMsg = intercept[AnalysisException] {
+      sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect
+    }.getMessage
+    assert(errMsg.contains("Generators are not supported when it's nested in expressions, " +
+      "but got: explode(explode(v))"))
+  }
 }
 
 case class EmptyGenerator() extends Generator {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org