You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/03/03 10:01:51 UTC

[spark] branch branch-3.0 updated: [SPARK-30998][SQL] ClassCastException when a generator having nested inner generators

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new ded0a72  [SPARK-30998][SQL] ClassCastException when a generator having nested inner generators
ded0a72 is described below

commit ded0a72d81c1d34753be8a156126312506fb50b1
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Tue Mar 3 19:00:33 2020 +0900

    [SPARK-30998][SQL] ClassCastException when a generator having nested inner generators
    
    ### What changes were proposed in this pull request?
    
    A query below failed in the master;
    
    ```
    scala> sql("select array(array(1, 2), array(3)) ar").select(explode(explode($"ar"))).show()
    20/03/01 13:51:56 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)/ 1]
    java.lang.ClassCastException: scala.collection.mutable.ArrayOps$ofRef cannot be cast to org.apache.spark.sql.catalyst.util.ArrayData
    	at org.apache.spark.sql.catalyst.expressions.ExplodeBase.eval(generators.scala:313)
    	at org.apache.spark.sql.execution.GenerateExec.$anonfun$doExecute$8(GenerateExec.scala:108)
    	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
    	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
    	at scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:222)
    	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
        ...
    ```
    
    This pr modified the `hasNestedGenerator` code in `ExtractGenerator` for correctly catching nested inner generators.
    
    ### Why are the changes needed?
    
    A bug fix.
    
    ### Does this PR introduce any user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added tests.
    
    Closes #27750 from maropu/HandleNestedGenerators.
    
    Authored-by: Takeshi Yamamuro <ya...@apache.org>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
    (cherry picked from commit 313e62c376acab30e546df253b28452a664d3e73)
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 16 +++++++++++++---
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala    | 19 +++++++++++++++++++
 .../org/apache/spark/sql/GeneratorFunctionSuite.scala |  8 ++++++++
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 3d79799..486b952 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2164,10 +2164,20 @@ class Analyzer(
     }
 
     private def hasNestedGenerator(expr: NamedExpression): Boolean = {
+      def hasInnerGenerator(g: Generator): Boolean = g match {
+        // Since `GeneratorOuter` is just a wrapper of generators, we skip it here
+        case go: GeneratorOuter =>
+          hasInnerGenerator(go.child)
+        case _ =>
+          g.children.exists { _.find {
+            case _: Generator => true
+            case _ => false
+          }.isDefined }
+      }
       CleanupAliases.trimNonTopLevelAliases(expr) match {
-        case UnresolvedAlias(_: Generator, _) => false
-        case Alias(_: Generator, _) => false
-        case MultiAlias(_: Generator, _) => false
+        case UnresolvedAlias(g: Generator, _) => hasInnerGenerator(g)
+        case Alias(g: Generator, _) => hasInnerGenerator(g)
+        case MultiAlias(g: Generator, _) => hasInnerGenerator(g)
         case other => hasGenerator(other)
       }
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 8f62b0b..3db1053 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -434,6 +434,25 @@ class AnalysisErrorSuite extends AnalysisTest {
   )
 
   errorTest(
+    "SPARK-30998: unsupported nested inner generators",
+    {
+      val nestedListRelation = LocalRelation(
+        AttributeReference("nestedList", ArrayType(ArrayType(IntegerType)))())
+      nestedListRelation.select(Explode(Explode($"nestedList")))
+    },
+    "Generators are not supported when it's nested in expressions, but got: " +
+      "explode(explode(nestedList))" :: Nil
+  )
+
+  errorTest(
+    "SPARK-30998: unsupported nested inner generators for aggregates",
+    testRelation.select(Explode(Explode(
+      CreateArray(CreateArray(min($"a") :: max($"a") :: Nil) :: Nil)))),
+    "Generators are not supported when it's nested in expressions, but got: " +
+      "explode(explode(array(array(min(a), max(a)))))" :: Nil
+  )
+
+  errorTest(
     "generator appears in operator which is not Project",
     listRelation.sortBy(Explode($"list").asc),
     "Generators are not supported outside the SELECT clause, but got: Sort" :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 96a0eb3..6785b31 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -343,6 +343,14 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       assert(msg2.contains("Only one generator allowed per aggregate clause"))
     }
   }
+
+  test("SPARK-30998: Unsupported nested inner generators") {
+    val errMsg = intercept[AnalysisException] {
+      sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect
+    }.getMessage
+    assert(errMsg.contains("Generators are not supported when it's nested in expressions, " +
+      "but got: explode(explode(v))"))
+  }
 }
 
 case class EmptyGenerator() extends Generator {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org