You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Kazuaki Ishizaki (Jira)" <ji...@apache.org> on 2020/02/04 10:20:00 UTC

[jira] [Comment Edited] (SPARK-30711) 64KB JVM bytecode limit - janino.InternalCompilerException

    [ https://issues.apache.org/jira/browse/SPARK-30711?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17029724#comment-17029724 ] 

Kazuaki Ishizaki edited comment on SPARK-30711 at 2/4/20 10:19 AM:
-------------------------------------------------------------------

In my environment, both v3.0.0-preview and master branches cause the exception.


was (Author: kiszk):
In my environment, both v3.0.0-preview and master branches causes the exception.

> 64KB JVM bytecode limit - janino.InternalCompilerException
> ----------------------------------------------------------
>
>                 Key: SPARK-30711
>                 URL: https://issues.apache.org/jira/browse/SPARK-30711
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.4.4
>         Environment: Windows 10
> Spark 2.4.4
> scalaVersion 2.11.12
> JVM Oracle 1.8.0_221-b11
>            Reporter: Frederik Schreiber
>            Priority: Major
>
> Exception
> {code:java}
> ERROR CodeGenerator: failed to compile: org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "processNext()V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4" grows beyond 64 KBERROR CodeGenerator: failed to compile: org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "processNext()V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4" grows beyond 64 KBorg.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass": Code of method "processNext()V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4" grows beyond 64 KB at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:382) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:237) at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:465) at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313) at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:207) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1290) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1372) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1369) at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) at org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) at org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000) at org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) at org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1238) at org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:584) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:583) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3384) at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2783) at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2783) at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364) at org.apache.spark.sql.Dataset.collect(Dataset.scala:2783) at de.sparkbug.janino.SparkJaninoBug$$anonfun$1.apply(SparkJaninoBug.scala:105) at de.sparkbug.janino.SparkJaninoBug$$anonfun$1.apply(SparkJaninoBug.scala:12) at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) at org.scalatest.TestSuite$class.withFixture(TestSuite.scala:196) at org.scalatest.FunSuite.withFixture(FunSuite.scala:1560) at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:183) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196) at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:286) at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:196) at org.scalatest.FunSuite.runTest(FunSuite.scala:1560) at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229) at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229) at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:393) at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:381) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:381) at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:376) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:458) at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:229) at org.scalatest.FunSuite.runTests(FunSuite.scala:1560) at org.scalatest.Suite$class.run(Suite.scala:1124) at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560) at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233) at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233) at org.scalatest.SuperEngine.runImpl(Engine.scala:518) at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:233) at org.scalatest.FunSuite.run(FunSuite.scala:1560) at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1349) at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1343) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1343) at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1012) at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011) at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1509) at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1011) at org.scalatest.tools.Runner$.run(Runner.scala:850) at org.scalatest.tools.Runner.run(Runner.scala) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:133) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:27)Caused by: org.codehaus.janino.InternalCompilerException: Code of method "processNext()V" of class "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4" grows beyond 64 KB at org.codehaus.janino.CodeContext.makeSpace(CodeContext.java:1009){code}
> Example code:
> {code:java}
> package de.sparkbug.janino
> import java.sql.Date
> import org.apache.spark.sql.functions._
> import org.apache.spark.sql.types._
> import org.apache.spark.sql.{Row, SparkSession}
> import org.scalatest.FunSuite
> class SparkJaninoBug extends FunSuite {
>   test("test janino compile bug") {
>      val spark: SparkSession = SparkSession
>       .builder()
>       .appName("Test Spark App")
>       .master("local[*]")
>       .getOrCreate()
>     import spark.implicits._
>     val br_schema = StructType(Seq(
>       StructField("S_ID", IntegerType, nullable = true),
>       StructField("D_ID", LongType, nullable = true),
>       StructField("TYPE", StringType, nullable = true),
>       StructField("RT", StringType, nullable = true),
>       StructField("BR_ID", StringType, nullable = true),
>       StructField("I_ID", DateType, nullable = true),
>       StructField("SG_ID", IntegerType, nullable = true),
>       StructField("S_ID_MAIN", IntegerType, nullable = true),
>       StructField("PT_ID", IntegerType, nullable = true),
>       StructField("C_ID", IntegerType, nullable = true),
>       StructField("CF_ID", IntegerType, nullable = true),
>       StructField("P_ID", IntegerType, nullable = true),
>       StructField("RT_ID", IntegerType, nullable = true),
>       StructField("BT_ID", IntegerType, nullable = true),
>       StructField("I_T", StringType, nullable = true),
>       StructField("A", DoubleType, nullable = true),
>       StructField("T_A", DoubleType, nullable = true),
>       StructField("B_S_DT", DateType, nullable = true),
>       StructField("B_E_DT", DateType, nullable = true),
>       StructField("B_M_DT", DateType, nullable = true),
>       StructField("BR_P_A", DoubleType, nullable = true),
>       StructField("BR_D_A", IntegerType, nullable = true),
>       StructField("BR_B_D", IntegerType, nullable = true),
>       StructField("BR_B_A", DoubleType, nullable = true)
>     ))
>     val b_data = Seq(Row(111, 2804711765L, "D", "recano", "1017888", Date.valueOf("2019-11-20"),1,111,1,1,1,2,1,1,"",0.0,0.0,Date.valueOf("2019-10-01"),Date.valueOf("2019-10-31"),Date.valueOf("2019-10-31"),0.0,30,30,0.0))
>     val df_b = spark.createDataFrame(spark.sparkContext.parallelize(b_data), br_schema)
>     val schema = StructType(Seq(
>       StructField( "D_ID", LongType, nullable = true),
>       StructField( "RT_ID", IntegerType, nullable = true),
>       StructField( "P_ID", IntegerType, nullable = true),
>       StructField( "BT_ID", IntegerType, nullable = true),
>       StructField( "CF_ID", IntegerType, nullable = true),
>       StructField( "B_ID", IntegerType, nullable = true),
>       StructField( "S_ID", IntegerType, nullable = true),
>       StructField( "SG_ID", IntegerType, nullable = true),
>       StructField( "PT_ID", IntegerType, nullable = true),
>       StructField( "C_ID", IntegerType, nullable = true),
>       StructField( "N_C_DT", DateType, nullable = true),
>       StructField( "N_T_DT", DateType, nullable = true),
>       StructField( "A_B_D", IntegerType, nullable = true),
>       StructField( "B_P_E", DateType, nullable = true),
>       StructField( "B_P_S", DateType, nullable = true),
>       StructField( "A_P_A", DoubleType, nullable = true),
>       StructField( "A_B_1_D_A", DoubleType, nullable = true),
>       StructField( "A_C", IntegerType, nullable = true),
>       StructField( "A_D_A", DoubleType, nullable = true)
>     ))
>     val a_data = Seq(Row(2804711813L,1,2,1,1,1,111,1,1,1,null,null,30,Date.valueOf("2019-10-31"),Date.valueOf("2019-10-01"),0.0,0.0,1,-1.0))
>     val df_a = spark.createDataFrame(spark.sparkContext.parallelize(a_data), schema)
>     val df = df_b
>       .join(df_a, List("D_ID", "RT_ID", "P_ID", "BT_ID", "CF_ID", "S_ID", "SG_ID", "PT_ID", "C_ID"), "outer")
>       .withColumn("T", lit(null))
>       .withColumn("B_C", lit("2"))
>       .withColumn("A_B_DT", to_date(concat(year(df_b("I_ID")), month(df_b("I_ID")), lpad($"B_C",2,"0")), "yyyyMMdd"))
>       .withColumn("B_B_DT", date_sub($"I_ID", 6))
>       .withColumn("B_P_E", when($"N_T_DT" > $"A_B_DT"
>         or $"N_C_DT" > $"A_B_DT", last_day($"B_P_E")).otherwise($"B_P_E"))
>       .withColumn("A_D_A", when($"RT_ID".isInCollection(Seq(5, 6)), $"A_D_A").otherwise(datediff($"B_P_E", $"B_P_S") + 1))
>       .withColumn("A_I_C", when($"N_C_DT".isNotNull and $"B_P_S" === $"B_P_E", lit(true)).otherwise(lit(false)))
>       .withColumn("A_D_A", when($"A_I_C", lit(0)).otherwise($"A_D_A"))
>       .withColumn("A_D_A", when($"A_D_A" > 30, 30).otherwise($"A_D_A"))
>       .withColumn("A_P_A", round($"A_B_1_D_A" * $"A_D_A", 7))
>       .withColumn("M_T_I",
>         when($"A_I_C", 5)
>           .when($"T".isNotNull, 6)
>           .when($"A_P_A".isNotNull and $"BR_P_A".isNotNull,
>             when(abs($"A_P_A" - $"BR_P_A") < 0.001, 1).otherwise(2))
>           .when($"A_P_A".isNotNull and $"BR_P_A".isNull, 3)
>           .when($"A_P_A".isNull and $"BR_P_A".isNotNull, 4)
>           .otherwise(lit(99)))
>       .withColumn("D_A", when($"M_T_I" === 2, round($"BR_P_A" - $"A_P_A", 7)).otherwise(lit(null)))
>       .withColumn("D_D", when($"M_T_I" === 2, round($"BR_D_A" - $"A_D_A", 7)).otherwise(lit(null)))
>       .withColumn("RT_ID", when($"RT_ID".isNull, lit(99)).otherwise($"RT_ID"))
>     df.collect()
>   }
> }
>  {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org