You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Apache Spark (JIRA)" <ji...@apache.org> on 2018/10/05 19:51:00 UTC
[jira] [Assigned] (SPARK-25654) createDataFrame does not support
nested JavaBeans inside arrays and collections
[ https://issues.apache.org/jira/browse/SPARK-25654?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Apache Spark reassigned SPARK-25654:
------------------------------------
Assignee: (was: Apache Spark)
> createDataFrame does not support nested JavaBeans inside arrays and collections
> -------------------------------------------------------------------------------
>
> Key: SPARK-25654
> URL: https://issues.apache.org/jira/browse/SPARK-25654
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.3.0
> Reporter: Michal Šenkýř
> Priority: Minor
>
> SPARK-17952 added support for nested JavaBeans to the Java DataFrame API.
> This issue tracks support for nested beans inside array and collection fields of JavaBeans.
> Current behavior:
> {noformat}
> scala> import scala.beans.BeanProperty
> import scala.beans.BeanProperty
> scala> class Nested(@BeanProperty var i: Int) extends Serializable
> defined class Nested
> scala> class Test(@BeanProperty var array: Array[Nested], @BeanProperty var list: java.util.List[Nested], @BeanProperty var map: java.util.Map[Integer, Nested]) extends Serializable
> defined class Test
> scala> import scala.collection.JavaConverters._
> import scala.collection.JavaConverters._
> scala> val array = Array(new Nested(1))
> array: Array[Nested] = Array(Nested@3dedc8b8)
> scala> val list = Seq(new Nested(2), new Nested(3)).asJava
> list: java.util.List[Nested] = [Nested@56adb75e, Nested@2cc7b63d]
> scala> val map = Map(Int.box(1) -> new Nested(4), Int.box(2) -> new Nested(5)).asJava
> map: java.util.Map[Integer,Nested] = {1=Nested@40bac624, 2=Nested@1bc80978}
> scala> val df = spark.createDataFrame(Seq(new Test(array, list, map)).asJava, classOf[Test])
> java.lang.IllegalArgumentException: The value (Nested@3dedc8b8) of the type (Nested) cannot be converted to struct<i:int>
> at org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:262)
> at org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:238)
> at org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103)
> at org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter$$anonfun$toCatalystImpl$1.apply(CatalystTypeConverters.scala:162)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
> at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
> at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
> at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
> at org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:162)
> at org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:154)
> at org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103)
> at org.apache.spark.sql.catalyst.CatalystTypeConverters$$anonfun$createToCatalystConverter$2.apply(CatalystTypeConverters.scala:396)
> at org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1$$anonfun$apply$1.apply(SQLContext.scala:1114)
> at org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1$$anonfun$apply$1.apply(SQLContext.scala:1113)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
> at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
> at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
> at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
> at org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1.apply(SQLContext.scala:1113)
> at org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1.apply(SQLContext.scala:1108)
> at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
> at scala.collection.Iterator$class.toStream(Iterator.scala:1320)
> at scala.collection.AbstractIterator.toStream(Iterator.scala:1334)
> at scala.collection.TraversableOnce$class.toSeq(TraversableOnce.scala:298)
> at scala.collection.AbstractIterator.toSeq(Iterator.scala:1334)
> at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:423)
> ... 51 elided
> {noformat}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org