You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Sean Owen (JIRA)" <ji...@apache.org> on 2016/01/02 15:24:39 UTC
[jira] [Updated] (SPARK-12478) Dataset fields of product types
can't be null
[ https://issues.apache.org/jira/browse/SPARK-12478?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sean Owen updated SPARK-12478:
------------------------------
Target Version/s: 1.6.1, 2.0.0 (was: 2.0.0)
> Dataset fields of product types can't be null
> ---------------------------------------------
>
> Key: SPARK-12478
> URL: https://issues.apache.org/jira/browse/SPARK-12478
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 1.6.0, 2.0.0
> Reporter: Cheng Lian
> Assignee: Apache Spark
> Labels: backport-needed
>
> Spark shell snippet for reproduction:
> {code}
> import sqlContext.implicits._
> case class Inner(f: Int)
> case class Outer(i: Inner)
> Seq(Outer(null)).toDS().toDF().show()
> Seq(Outer(null)).toDS().show()
> {code}
> Expected output should be:
> {noformat}
> +----+
> | i|
> +----+
> |null|
> +----+
> +----+
> | i|
> +----+
> |null|
> +----+
> {noformat}
> Actual output:
> {noformat}
> +------+
> | i|
> +------+
> |[null]|
> +------+
> java.lang.RuntimeException: Error while decoding: java.lang.RuntimeException: Null value appeared in non-nullable field Inner.f of type scala.Int. If the schema is inferred from a Scala tuple/case class, or a Java bean, please try to use scala.Option[_] or other nullable types (e.g. java.lang.Integer instead of int/scala.Int).
> newinstance(class $iwC$$iwC$Outer,if (isnull(input[0, StructType(StructField(f,IntegerType,false))])) null else newinstance(class $iwC$$iwC$Inner,assertnotnull(input[0, StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int),false,ObjectType(class $iwC$$iwC$Inner),Some($iwC$$iwC@6616b9e0)),false,ObjectType(class $iwC$$iwC$Outer),Some($iwC$$iwC@6ab35ce3))
> +- if (isnull(input[0, StructType(StructField(f,IntegerType,false))])) null else newinstance(class $iwC$$iwC$Inner,assertnotnull(input[0, StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int),false,ObjectType(class $iwC$$iwC$Inner),Some($iwC$$iwC@6616b9e0))
> :- isnull(input[0, StructType(StructField(f,IntegerType,false))])
> : +- input[0, StructType(StructField(f,IntegerType,false))]
> :- null
> +- newinstance(class $iwC$$iwC$Inner,assertnotnull(input[0, StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int),false,ObjectType(class $iwC$$iwC$Inner),Some($iwC$$iwC@6616b9e0))
> +- assertnotnull(input[0, StructType(StructField(f,IntegerType,false))].f,Inner,f,scala.Int)
> +- input[0, StructType(StructField(f,IntegerType,false))].f
> +- input[0, StructType(StructField(f,IntegerType,false))]
> at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.fromRow(ExpressionEncoder.scala:224)
> at org.apache.spark.sql.Dataset$$anonfun$collect$2.apply(Dataset.scala:704)
> at org.apache.spark.sql.Dataset$$anonfun$collect$2.apply(Dataset.scala:704)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
> at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
> at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108)
> at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
> at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:108)
> at org.apache.spark.sql.Dataset.collect(Dataset.scala:704)
> at org.apache.spark.sql.Dataset.take(Dataset.scala:725)
> at org.apache.spark.sql.Dataset.showString(Dataset.scala:240)
> at org.apache.spark.sql.Dataset.show(Dataset.scala:230)
> at org.apache.spark.sql.Dataset.show(Dataset.scala:193)
> at org.apache.spark.sql.Dataset.show(Dataset.scala:201)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42)
> at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
> at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46)
> at $iwC$$iwC$$iwC$$iwC.<init>(<console>:48)
> at $iwC$$iwC$$iwC.<init>(<console>:50)
> at $iwC$$iwC.<init>(<console>:52)
> at $iwC.<init>(<console>:54)
> at <init>(<console>:56)
> at .<init>(<console>:60)
> at .<clinit>(<console>)
> at .<init>(<console>:7)
> at .<clinit>(<console>)
> at $print(<console>)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:483)
> at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045)
> at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326)
> at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821)
> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852)
> at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800)
> at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
> at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
> at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
> at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
> at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
> at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
> at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
> at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
> at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
> at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
> at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
> at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064)
> at org.apache.spark.repl.Main$.main(Main.scala:31)
> at org.apache.spark.repl.Main.main(Main.scala)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:483)
> at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
> at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
> at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.lang.RuntimeException: Null value appeared in non-nullable field Inner.f of type scala.Int. If the schema is inferred from a Scala tuple/case class, or a Java bean, please try to use scala.Option[_] or other nullable types (e.g. java.lang.Integer instead of int/scala.Int).
> at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
> at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.fromRow(ExpressionEncoder.scala:221)
> ... 62 more
> {noformat}
> We can see that there's an unexpected extra nested row in the first output, which causes the exception below.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org