You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Michal Šenkýř (JIRA)" <ji...@apache.org> on 2018/10/05 19:17:00 UTC
[jira] [Created] (SPARK-25654) createDataFrame does not support
nested JavaBeans inside arrays and collections
Michal Šenkýř created SPARK-25654:
-------------------------------------
Summary: createDataFrame does not support nested JavaBeans inside arrays and collections
Key: SPARK-25654
URL: https://issues.apache.org/jira/browse/SPARK-25654
Project: Spark
Issue Type: Bug
Components: SQL
Affects Versions: 2.3.0
Reporter: Michal Šenkýř
SPARK-17952 added support for nested JavaBeans to the Java DataFrame API.
This issue tracks support for nested beans inside array and collection fields of JavaBeans.
Current behavior:
{noformat}
scala> import scala.beans.BeanProperty
import scala.beans.BeanProperty
scala> class Nested(@BeanProperty var i: Int) extends Serializable
defined class Nested
scala> class Test(@BeanProperty var array: Array[Nested], @BeanProperty var list: java.util.List[Nested], @BeanProperty var map: java.util.Map[Integer, Nested]) extends Serializable
defined class Test
scala> import scala.collection.JavaConverters._
import scala.collection.JavaConverters._
scala> val array = Array(new Nested(1))
array: Array[Nested] = Array(Nested@3dedc8b8)
scala> val list = Seq(new Nested(2), new Nested(3)).asJava
list: java.util.List[Nested] = [Nested@56adb75e, Nested@2cc7b63d]
scala> val map = Map(Int.box(1) -> new Nested(4), Int.box(2) -> new Nested(5)).asJava
map: java.util.Map[Integer,Nested] = {1=Nested@40bac624, 2=Nested@1bc80978}
scala> val df = spark.createDataFrame(Seq(new Test(array, list, map)).asJava, classOf[Test])
java.lang.IllegalArgumentException: The value (Nested@3dedc8b8) of the type (Nested) cannot be converted to struct<i:int>
at org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:262)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:238)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter$$anonfun$toCatalystImpl$1.apply(CatalystTypeConverters.scala:162)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:162)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$ArrayConverter.toCatalystImpl(CatalystTypeConverters.scala:154)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:103)
at org.apache.spark.sql.catalyst.CatalystTypeConverters$$anonfun$createToCatalystConverter$2.apply(CatalystTypeConverters.scala:396)
at org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1$$anonfun$apply$1.apply(SQLContext.scala:1114)
at org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1$$anonfun$apply$1.apply(SQLContext.scala:1113)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
at org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1.apply(SQLContext.scala:1113)
at org.apache.spark.sql.SQLContext$$anonfun$createStructConverter$1$1.apply(SQLContext.scala:1108)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$class.toStream(Iterator.scala:1320)
at scala.collection.AbstractIterator.toStream(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toSeq(TraversableOnce.scala:298)
at scala.collection.AbstractIterator.toSeq(Iterator.scala:1334)
at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:423)
... 51 elided
{noformat}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org