You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/03/03 10:01:51 UTC
[spark] branch branch-3.0 updated: [SPARK-30998][SQL]
ClassCastException when a generator having nested inner generators
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new ded0a72 [SPARK-30998][SQL] ClassCastException when a generator having nested inner generators
ded0a72 is described below
commit ded0a72d81c1d34753be8a156126312506fb50b1
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Tue Mar 3 19:00:33 2020 +0900
[SPARK-30998][SQL] ClassCastException when a generator having nested inner generators
### What changes were proposed in this pull request?
A query below failed in the master;
```
scala> sql("select array(array(1, 2), array(3)) ar").select(explode(explode($"ar"))).show()
20/03/01 13:51:56 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)/ 1]
java.lang.ClassCastException: scala.collection.mutable.ArrayOps$ofRef cannot be cast to org.apache.spark.sql.catalyst.util.ArrayData
at org.apache.spark.sql.catalyst.expressions.ExplodeBase.eval(generators.scala:313)
at org.apache.spark.sql.execution.GenerateExec.$anonfun$doExecute$8(GenerateExec.scala:108)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
at scala.collection.Iterator$ConcatIterator.hasNext(Iterator.scala:222)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
...
```
This pr modified the `hasNestedGenerator` code in `ExtractGenerator` for correctly catching nested inner generators.
### Why are the changes needed?
A bug fix.
### Does this PR introduce any user-facing change?
No.
### How was this patch tested?
Added tests.
Closes #27750 from maropu/HandleNestedGenerators.
Authored-by: Takeshi Yamamuro <ya...@apache.org>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
(cherry picked from commit 313e62c376acab30e546df253b28452a664d3e73)
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
.../apache/spark/sql/catalyst/analysis/Analyzer.scala | 16 +++++++++++++---
.../sql/catalyst/analysis/AnalysisErrorSuite.scala | 19 +++++++++++++++++++
.../org/apache/spark/sql/GeneratorFunctionSuite.scala | 8 ++++++++
3 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 3d79799..486b952 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2164,10 +2164,20 @@ class Analyzer(
}
private def hasNestedGenerator(expr: NamedExpression): Boolean = {
+ def hasInnerGenerator(g: Generator): Boolean = g match {
+ // Since `GeneratorOuter` is just a wrapper of generators, we skip it here
+ case go: GeneratorOuter =>
+ hasInnerGenerator(go.child)
+ case _ =>
+ g.children.exists { _.find {
+ case _: Generator => true
+ case _ => false
+ }.isDefined }
+ }
CleanupAliases.trimNonTopLevelAliases(expr) match {
- case UnresolvedAlias(_: Generator, _) => false
- case Alias(_: Generator, _) => false
- case MultiAlias(_: Generator, _) => false
+ case UnresolvedAlias(g: Generator, _) => hasInnerGenerator(g)
+ case Alias(g: Generator, _) => hasInnerGenerator(g)
+ case MultiAlias(g: Generator, _) => hasInnerGenerator(g)
case other => hasGenerator(other)
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 8f62b0b..3db1053 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -434,6 +434,25 @@ class AnalysisErrorSuite extends AnalysisTest {
)
errorTest(
+ "SPARK-30998: unsupported nested inner generators",
+ {
+ val nestedListRelation = LocalRelation(
+ AttributeReference("nestedList", ArrayType(ArrayType(IntegerType)))())
+ nestedListRelation.select(Explode(Explode($"nestedList")))
+ },
+ "Generators are not supported when it's nested in expressions, but got: " +
+ "explode(explode(nestedList))" :: Nil
+ )
+
+ errorTest(
+ "SPARK-30998: unsupported nested inner generators for aggregates",
+ testRelation.select(Explode(Explode(
+ CreateArray(CreateArray(min($"a") :: max($"a") :: Nil) :: Nil)))),
+ "Generators are not supported when it's nested in expressions, but got: " +
+ "explode(explode(array(array(min(a), max(a)))))" :: Nil
+ )
+
+ errorTest(
"generator appears in operator which is not Project",
listRelation.sortBy(Explode($"list").asc),
"Generators are not supported outside the SELECT clause, but got: Sort" :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 96a0eb3..6785b31 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -343,6 +343,14 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
assert(msg2.contains("Only one generator allowed per aggregate clause"))
}
}
+
+ test("SPARK-30998: Unsupported nested inner generators") {
+ val errMsg = intercept[AnalysisException] {
+ sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect
+ }.getMessage
+ assert(errMsg.contains("Generators are not supported when it's nested in expressions, " +
+ "but got: explode(explode(v))"))
+ }
}
case class EmptyGenerator() extends Generator {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org