You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/03/03 14:49:03 UTC
[spark] branch branch-2.4 updated: [SPARK-30998][SQL][2.4]
ClassCastException when a generator having nested inner generators
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new f4c8c48 [SPARK-30998][SQL][2.4] ClassCastException when a generator having nested inner generators
f4c8c48 is described below
commit f4c8c4892197b8c5425a8013a09e9b379444e6fc
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Tue Mar 3 23:47:40 2020 +0900
[SPARK-30998][SQL][2.4] ClassCastException when a generator having nested inner generators
### What changes were proposed in this pull request?
A query below failed in branch-2.4;
```
scala> sql("select array(array(1, 2), array(3)) ar").select(explode(explode($"ar"))).show()
20/03/01 13:51:56 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)/ 1]
java.lang.ClassCastException: scala.collection.mutable.ArrayOps$ofRef cannot be cast to org.apache.spark.sql.catalyst.util.ArrayData
at org.apache.spark.sql.catalyst.expressions.ExplodeBase.eval(generators.scala:313)
at org.apache.spark.sql.execution.GenerateExec.$anonfun$doExecute$8(GenerateExec.scala:108)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
at scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:222)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
...
```
This pr modified the `hasNestedGenerator` code in `ExtractGenerator` for correctly catching nested inner generators.
This backport PR comes from https://github.com/apache/spark/pull/27750#
### Why are the changes needed?
A bug fix.
### Does this PR introduce any user-facing change?
No.
### How was this patch tested?
Added tests.
Closes #27769 from maropu/SPARK-20998-BRANCH-2.4.
Authored-by: Takeshi Yamamuro <ya...@apache.org>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
.../apache/spark/sql/catalyst/analysis/Analyzer.scala | 16 +++++++++++++---
.../sql/catalyst/analysis/AnalysisErrorSuite.scala | 19 +++++++++++++++++++
.../org/apache/spark/sql/GeneratorFunctionSuite.scala | 8 ++++++++
3 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0fedf7f..61f77be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1681,10 +1681,20 @@ class Analyzer(
}
private def hasNestedGenerator(expr: NamedExpression): Boolean = {
+ def hasInnerGenerator(g: Generator): Boolean = g match {
+ // Since `GeneratorOuter` is just a wrapper of generators, we skip it here
+ case go: GeneratorOuter =>
+ hasInnerGenerator(go.child)
+ case _ =>
+ g.children.exists { _.find {
+ case _: Generator => true
+ case _ => false
+ }.isDefined }
+ }
CleanupAliases.trimNonTopLevelAliases(expr) match {
- case UnresolvedAlias(_: Generator, _) => false
- case Alias(_: Generator, _) => false
- case MultiAlias(_: Generator, _) => false
+ case UnresolvedAlias(g: Generator, _) => hasInnerGenerator(g)
+ case Alias(g: Generator, _) => hasInnerGenerator(g)
+ case MultiAlias(g: Generator, _) => hasInnerGenerator(g)
case other => hasGenerator(other)
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 45319aa..337902f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -395,6 +395,25 @@ class AnalysisErrorSuite extends AnalysisTest {
)
errorTest(
+ "SPARK-30998: unsupported nested inner generators",
+ {
+ val nestedListRelation = LocalRelation(
+ AttributeReference("nestedList", ArrayType(ArrayType(IntegerType)))())
+ nestedListRelation.select(Explode(Explode($"nestedList")))
+ },
+ "Generators are not supported when it's nested in expressions, but got: " +
+ "explode(explode(nestedList))" :: Nil
+ )
+
+ errorTest(
+ "SPARK-30998: unsupported nested inner generators for aggregates",
+ testRelation.select(Explode(Explode(
+ CreateArray(CreateArray(min($"a") :: max($"a") :: Nil) :: Nil)))),
+ "Generators are not supported when it's nested in expressions, but got: " +
+ "explode(explode(array(array(min(a), max(a)))))" :: Nil
+ )
+
+ errorTest(
"generator appears in operator which is not Project",
listRelation.sortBy(Explode('list).asc),
"Generators are not supported outside the SELECT clause, but got: Sort" :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 8280a3c..df66b49 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -308,6 +308,14 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
sql("select * from values 1, 2 lateral view outer empty_gen() a as b"),
Row(1, null) :: Row(2, null) :: Nil)
}
+
+ test("SPARK-30998: Unsupported nested inner generators") {
+ val errMsg = intercept[AnalysisException] {
+ sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect
+ }.getMessage
+ assert(errMsg.contains("Generators are not supported when it's nested in expressions, " +
+ "but got: explode(explode(v))"))
+ }
}
case class EmptyGenerator() extends Generator {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org