You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Marco Gaido (JIRA)" <ji...@apache.org> on 2018/06/07 15:38:00 UTC

[jira] [Commented] (SPARK-24481) GeneratedIteratorForCodegenStage1 grows beyond 64 KB

    [ https://issues.apache.org/jira/browse/SPARK-24481?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16504828#comment-16504828 ] 

Marco Gaido commented on SPARK-24481:
-------------------------------------

Thanks for reporting this. I am investigating more, but I think this is not related to large case-when statements, but instead it is caused by SPARK-22600. Anyway, you can easily workaroung the issue by setting {{spark.sql.codegen.fallback=true}} in your configs.

> GeneratedIteratorForCodegenStage1 grows beyond 64 KB
> ----------------------------------------------------
>
>                 Key: SPARK-24481
>                 URL: https://issues.apache.org/jira/browse/SPARK-24481
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.3.0
>         Environment: Emr 5.13.0 and Databricks Cloud 4.0
>            Reporter: Andrew Conegliano
>            Priority: Major
>         Attachments: log4j-active(1).log
>
>
> Similar to other "grows beyond 64 KB" errors.  Happens with large case statement:
> {code:java}
> import org.apache.spark.sql.functions._
> import scala.collection.mutable
> import org.apache.spark.sql.Column
> var rdd = sc.parallelize(Array("""{
> "event":
> {
> "timestamp": 1521086591110,
> "event_name": "yu",
> "page":
> {
> "page_url": "https://",
> "page_name": "es"
> },
> "properties":
> {
> "id": "87",
> "action": "action",
> "navigate_action": "navigate_action"
> }
> }
> }
> """))
> var df = spark.read.json(rdd)
> df = df.select("event.properties.id","event.timestamp","event.page.page_url","event.properties.action","event.page.page_name","event.event_name","event.properties.navigate_action")
> .toDF("id","event_time","url","action","page_name","event_name","navigation_action")
> var a = "case "
> for(i <- 1 to 300){
>   a = a + s"when action like '$i%' THEN '$i' "
> }
> a = a + " else null end as task_id"
> val expression = expr(a)
> df = df.filter("id is not null and id <> '' and event_time is not null")
> val transformationExpressions: mutable.HashMap[String, Column] = mutable.HashMap(
> "action" -> expr("coalesce(action, navigation_action) as action"),
> "task_id" -> expression
> )
> for((col, expr) <- transformationExpressions)
> df = df.withColumn(col, expr)
> df = df.filter("(action is not null and action <> '') or (page_name is not null and page_name <> '')")
> df.show
> {code}
>  
> Exception:
> {code:java}
> 18/06/07 01:06:34 ERROR CodeGenerator: failed to compile: org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1" grows beyond 64 KB
> org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "project_doConsume$(Lorg/apache/spark/sql/catalyst/expressions/GeneratedClass$GeneratedIteratorForCodegenStage1;Lorg/apache/spark/sql/catalyst/InternalRow;)V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1" grows beyond 64 KB
> 	at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:361)
> 	at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:234)
> 	at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:446)
> 	at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313)
> 	at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235)
> 	at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:204)
> 	at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80)
> 	at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1444)
> 	at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1523)
> 	at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1520)
> 	at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3522)
> 	at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2315)
> 	at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2278)
> 	at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2193)
> 	at com.google.common.cache.LocalCache.get(LocalCache.java:3932)
> 	at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3936)
> 	at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4806)
> 	at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1392)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:579)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:578)
> 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:135)
> 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
> 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$3.apply(SparkPlan.scala:167)
> 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> 	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:164)
> 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
> 	at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:61)
> 	at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:70)
> 	at org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:45)
> 	at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectResult(Dataset.scala:2759)
> 	at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3331)
> 	at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2488)
> 	at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2488)
> 	at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3315)
> 	at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:88)
> 	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:124)
> 	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3314)
> 	at org.apache.spark.sql.Dataset.head(Dataset.scala:2488)
> 	at org.apache.spark.sql.Dataset.take(Dataset.scala:2702)
> 	at org.apache.spark.sql.Dataset.showString(Dataset.scala:258)
> 	at org.apache.spark.sql.Dataset.show(Dataset.scala:727)
> 	at org.apache.spark.sql.Dataset.show(Dataset.scala:686)
> 	at org.apache.spark.sql.Dataset.show(Dataset.scala:695)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:1)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:51)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:53)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-687647945500165:55)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw$$iw.<init>(command-687647945500165:57)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw$$iw.<init>(command-687647945500165:59)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw$$iw.<init>(command-687647945500165:61)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$$iw.<init>(command-687647945500165:63)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read.<init>(command-687647945500165:65)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$.<init>(command-687647945500165:69)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$read$.<clinit>(command-687647945500165)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$eval$.$print$lzycompute(<notebook>:7)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$eval$.$print(<notebook>:6)
> 	at line7b2cd01e0857498cbfa87d4dfaadb85d46.$eval.$print(<notebook>)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:498)
> 	at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786)
> 	at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047)
> 	at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638)
> 	at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637)
> 	at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
> 	at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
> 	at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637)
> 	at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569)
> 	at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565)
> 	at com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:186)
> 	at com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply$mcV$sp(ScalaDriverLocal.scala:189)
> 	at com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:189)
> 	at com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:189)
> 	at com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:500)
> 	at com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:456)
> 	at com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:189)
> 	at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$3.apply(DriverLocal.scala:249)
> 	at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$3.apply(DriverLocal.scala:229)
> 	at com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:188)
> 	at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
> 	at com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:183)
> 	at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:43)
> 	at com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:221)
> 	at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:43)
> 	at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:229)
> 	at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:601)
> 	at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:601)
> 	at scala.util.Try$.apply(Try.scala:192)
> 	at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:596)
> 	at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:486)
> 	at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:554)
> 	at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:391)
> 	at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:348)
> 	at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:215)
> 	at java.lang.Thread.run(Thread.java:748){code}
>  
> Log file is attached



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org