You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Herman van Hovell (JIRA)" <ji...@apache.org> on 2018/10/29 15:51:00 UTC

[jira] [Resolved] (SPARK-25767) Error reported in Spark logs when using the org.apache.spark:spark-sql_2.11:2.3.2 Java library

     [ https://issues.apache.org/jira/browse/SPARK-25767?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Herman van Hovell resolved SPARK-25767.
---------------------------------------
       Resolution: Fixed
    Fix Version/s: 3.0.0
                   2.4.1

> Error reported in Spark logs when using the org.apache.spark:spark-sql_2.11:2.3.2 Java library
> ----------------------------------------------------------------------------------------------
>
>                 Key: SPARK-25767
>                 URL: https://issues.apache.org/jira/browse/SPARK-25767
>             Project: Spark
>          Issue Type: Bug
>          Components: Java API
>    Affects Versions: 2.2.0, 2.3.2
>            Reporter: Thomas Brugiere
>            Assignee: Peter Toth
>            Priority: Major
>             Fix For: 2.4.1, 3.0.0
>
>         Attachments: fileA.csv, fileB.csv, fileC.csv
>
>
> Hi,
> Here is a bug I found using the latest version of spark-sql_2.11:2.2.0. Note that this case was also tested with spark-sql_2.11:2.3.2 and the bug is also present.
> This issue is a duplicate of the SPARK-25582 issue that I had to close after an accidental manipulation from another developer (was linked to a wrong PR)
> You will find attached three small sample CSV files with the minimal content to raise the bug.
> Find below a reproducer code:
> {code:java}
> import org.apache.spark.SparkConf;
> import org.apache.spark.sql.Dataset;
> import org.apache.spark.sql.Row;
> import org.apache.spark.sql.SparkSession;
> import scala.collection.JavaConverters;
> import scala.collection.Seq;
> import java.util.Arrays;
> public class SparkBug {
>     private static <T> Seq<T> arrayToSeq(T[] input) {
>         return JavaConverters.asScalaIteratorConverter(Arrays.asList(input).iterator()).asScala().toSeq();
>     }
>     public static void main(String[] args) throws Exception {
>         SparkConf conf = new SparkConf().setAppName("SparkBug").setMaster("local");
>         SparkSession sparkSession = SparkSession.builder().config(conf).getOrCreate();
>         Dataset<Row> df_a = sparkSession.read().option("header", true).csv("local/fileA.csv").dropDuplicates();
>         Dataset<Row> df_b = sparkSession.read().option("header", true).csv("local/fileB.csv").dropDuplicates();
>         Dataset<Row> df_c = sparkSession.read().option("header", true).csv("local/fileC.csv").dropDuplicates();
>         String[] key_join_1 = new String[]{"colA", "colB", "colC", "colD", "colE", "colF"};
>         String[] key_join_2 = new String[]{"colA", "colB", "colC", "colD", "colE"};
>         Dataset<Row> df_inventory_1 = df_a.join(df_b, arrayToSeq(key_join_1), "left");
>         Dataset<Row> df_inventory_2 = df_inventory_1.join(df_c, arrayToSeq(key_join_2), "left");
>         df_inventory_2.show();
>     }
> }
> {code}
> When running this code, I can see the exception below:
> {code:java}
> 18/10/18 09:25:49 ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 202, Column 18: Expression "agg_isNull_28" is not an rvalue
> org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 202, Column 18: Expression "agg_isNull_28" is not an rvalue
>     at org.codehaus.janino.UnitCompiler.compileError(UnitCompiler.java:11821)
>     at org.codehaus.janino.UnitCompiler.toRvalueOrCompileException(UnitCompiler.java:7170)
>     at org.codehaus.janino.UnitCompiler.getConstantValue2(UnitCompiler.java:5332)
>     at org.codehaus.janino.UnitCompiler.access$9400(UnitCompiler.java:212)
>     at org.codehaus.janino.UnitCompiler$13$1.visitAmbiguousName(UnitCompiler.java:5287)
>     at org.codehaus.janino.Java$AmbiguousName.accept(Java.java:4053)
>     at org.codehaus.janino.UnitCompiler$13.visitLvalue(UnitCompiler.java:5284)
>     at org.codehaus.janino.Java$Lvalue.accept(Java.java:3977)
>     at org.codehaus.janino.UnitCompiler.getConstantValue(UnitCompiler.java:5280)
>     at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2391)
>     at org.codehaus.janino.UnitCompiler.access$1900(UnitCompiler.java:212)
>     at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1474)
>     at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1466)
>     at org.codehaus.janino.Java$IfStatement.accept(Java.java:2926)
>     at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1466)
>     at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1546)
>     at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3075)
>     at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1336)
>     at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1309)
>     at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:799)
>     at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:958)
>     at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:212)
>     at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:393)
>     at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:385)
>     at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1286)
>     at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:385)
>     at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1285)
>     at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:825)
>     at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:411)
>     at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:212)
>     at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:390)
>     at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:385)
>     at org.codehaus.janino.Java$PackageMemberClassDeclaration.accept(Java.java:1405)
>     at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:385)
>     at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:357)
>     at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:234)
>     at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:446)
>     at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313)
>     at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235)
>     at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:204)
>     at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80)
>     at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1417)
>     at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1493)
>     at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1490)
>     at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
>     at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
>     at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
>     at org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
>     at org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000)
>     at org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
>     at org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
>     at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1365)
>     at org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:579)
>     at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:578)
>     at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
>     at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
>     at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
>     at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>     at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
>     at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
>     at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247)
>     at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:337)
>     at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
>     at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3278)
>     at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489)
>     at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2489)
>     at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3259)
>     at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
>     at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3258)
>     at org.apache.spark.sql.Dataset.head(Dataset.scala:2489)
>     at org.apache.spark.sql.Dataset.take(Dataset.scala:2703)
>     at org.apache.spark.sql.Dataset.showString(Dataset.scala:254)
>     at org.apache.spark.sql.Dataset.show(Dataset.scala:723)
>     at org.apache.spark.sql.Dataset.show(Dataset.scala:682)
>     at org.apache.spark.sql.Dataset.show(Dataset.scala:691)
>     at SparkBug.main(SparkBug.java:30)
> {code}
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org