You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Michael Armbrust (JIRA)" <ji...@apache.org> on 2016/12/07 20:57:59 UTC

[jira] [Resolved] (SPARK-16902) Custom ExpressionEncoder for primitive array is not effective

     [ https://issues.apache.org/jira/browse/SPARK-16902?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Michael Armbrust resolved SPARK-16902.
--------------------------------------
    Resolution: Not A Problem

The encoder that is used is picked by scala's implicit resolution.  Just because you construct an encoder doesn't mean that we can find it (which is why its not having any affect).

I also think the nullability here is correct.  Even though the elements cannot be null (since they are primitives), you can put a null into the value field of DataPoint.

> Custom ExpressionEncoder for primitive array is not effective
> -------------------------------------------------------------
>
>                 Key: SPARK-16902
>                 URL: https://issues.apache.org/jira/browse/SPARK-16902
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>            Reporter: Kazuaki Ishizaki
>
> Even when a programmer explicitly specifies custom {{ExpressionEncoder}} for primitive array, it does not seem to be used.
> The following test cases cause assertion errors. Since the custom {{ExpressionEncoder}} is used, the {{nullable}} should be {{false}} for {{value: array}}.
> {code:java}
> class Test extends QueryTest with SharedSQLContext {
>   import testImplicits._
>   test("test") {
>     val schema = new StructType()
>       .add("array", ArrayType(DoubleType, containsNull = false), false)
>     val cls = classOf[Array[Double]]
>     val inputObject = BoundReference(0, ScalaReflection.dataTypeFor[Array[Double]], false)
>     val serializer = ScalaReflection.serializerFor[Array[Double]](
>       AssertNotNull(inputObject, Seq("non null array")))
>     val deserializer = ScalaReflection.deserializerFor[Array[Double]]
>     val encoder = new ExpressionEncoder[Array[Double]](
>       schema,
>       true,
>       serializer.flatten,
>       deserializer,
>       ClassTag[Array[Double]](cls)
>     )
>     val ds1 = sparkContext.parallelize(Seq(Array(1.1, 1.2), Array(2.1, 2.2)), 1).toDS
>     ds1.count
>     val ds10 = ds1.map(e => e)(encoder)
>     ds10.show
>     ds10.printSchema
>     assert(ds10.schema.fields(0).nullable == false)
>     val ds2 = sparkContext.parallelize(
>       Seq(DataPoint(Array(1.1, 1.2), 1.0), DataPoint(Array(2.1, 2.2), 2.0)), 1).toDS
>     ds2.count
>     val ds20 = ds2.map(p => p.x)(encoder)
>     ds20.show
>     ds20.printSchema
>     assert(ds20.schema.fields(0).nullable == false)
>   }
> }
> {code}
> {code}
> +----------+
> |     value|
> +----------+
> |[1.1, 1.2]|
> |[2.1, 2.2]|
> +----------+
> root
>  |-- value: array (nullable = true)
>  |    |-- element: double (containsNull = false)
> true did not equal false
> ScalaTestFailureLocation: org.apache.spark.sql.MySuite$$anonfun$1 at (MySuite.scala:489)
> org.scalatest.exceptions.TestFailedException: true did not equal false
> 	at org.scalatest.Assertions$class.newAssertionFailedException(Assertions.scala:500)
> 	at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1555)
> 	at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:466)
> 	at org.apache.spark.sql.MySuite$$anonfun$1.apply$mcV$sp(MySuite.scala:489)
> 	at org.apache.spark.sql.MySuite$$anonfun$1.apply(MySuite.scala:39)
> 	at org.apache.spark.sql.MySuite$$anonfun$1.apply(MySuite.scala:39)
> 	at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
> 	at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
> 	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
> 	at org.scalatest.Transformer.apply(Transformer.scala:22)
> 	at org.scalatest.Transformer.apply(Transformer.scala:20)
> 	at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
> 	at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:57)
> 	at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
> 	at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
> 	at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
> 	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
> 	at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175)
> 	at org.scalatest.FunSuite.runTest(FunSuite.scala:1555)
> 	at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
> 	at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
> 	at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413)
> 	at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
> 	at scala.collection.immutable.List.foreach(List.scala:381)
> 	at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
> 	at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396)
> 	at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483)
> 	at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208)
> 	at org.scalatest.FunSuite.runTests(FunSuite.scala:1555)
> 	at org.scalatest.Suite$class.run(Suite.scala:1424)
> 	at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555)
> 	at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
> 	at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
> 	at org.scalatest.SuperEngine.runImpl(Engine.scala:545)
> 	at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212)
> 	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:29)
> 	at org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257)
> 	at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256)
> 	at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:29)
> 	at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55)
> 	at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563)
> 	at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557)
> 	at scala.collection.immutable.List.foreach(List.scala:381)
> 	at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557)
> 	at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044)
> 	at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043)
> 	at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722)
> 	at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043)
> 	at org.scalatest.tools.Runner$.run(Runner.scala:883)
> 	at org.scalatest.tools.Runner.run(Runner.scala)
> 	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:138)
> 	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:498)
> 	at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org